Fix emulator build error
[platform/framework/web/chromium-efl.git] / components / browsing_topics / browsing_topics_service_impl.cc
1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/browsing_topics/browsing_topics_service_impl.h"
6
7 #include <random>
8 #include <vector>
9
10 #include "base/functional/bind.h"
11 #include "base/metrics/histogram_functions.h"
12 #include "base/notreached.h"
13 #include "base/rand_util.h"
14 #include "base/ranges/algorithm.h"
15 #include "base/strings/strcat.h"
16 #include "base/time/time.h"
17 #include "components/browsing_topics/browsing_topics_calculator.h"
18 #include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
19 #include "components/browsing_topics/common/common_types.h"
20 #include "components/browsing_topics/mojom/browsing_topics_internals.mojom.h"
21 #include "components/browsing_topics/util.h"
22 #include "components/privacy_sandbox/canonical_topic.h"
23 #include "content/public/browser/browsing_topics_site_data_manager.h"
24 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
25 #include "services/metrics/public/cpp/ukm_builders.h"
26 #include "services/metrics/public/cpp/ukm_recorder.h"
27 #include "third_party/blink/public/common/features.h"
28 #include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
29
30 namespace browsing_topics {
31
32 namespace {
33
34 // Returns whether the topics should all be cleared given
35 // `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
36 // Returns true if `browsing_topics_data_accessible_since` is greater than the
37 // last calculation time.
38 bool ShouldClearTopicsOnStartup(
39     const BrowsingTopicsState& browsing_topics_state,
40     base::Time browsing_topics_data_accessible_since) {
41   if (browsing_topics_state.epochs().empty()) {
42     return false;
43   }
44
45   // Here we rely on the fact that `browsing_topics_data_accessible_since` can
46   // only be updated to base::Time::Now() due to data deletion. So we'll either
47   // need to clear all topics data, or no-op. If this assumption no longer
48   // holds, we'd need to iterate over all epochs, check their calculation time,
49   // and selectively delete the epochs.
50   if (browsing_topics_data_accessible_since >
51       browsing_topics_state.epochs().back().calculation_time()) {
52     return true;
53   }
54
55   return false;
56 }
57
58 // Returns a vector of top topics which are disallowed and thus should be
59 // cleared. This could happen if the topic became disallowed when
60 // `browsing_topics_state` was still loading (and we didn't get a chance to
61 // clear it).
62 std::vector<privacy_sandbox::CanonicalTopic> TopTopicsToClearOnStartup(
63     const BrowsingTopicsState& browsing_topics_state,
64     base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
65         is_topic_allowed_by_settings) {
66   DCHECK(!is_topic_allowed_by_settings.is_null());
67   std::vector<privacy_sandbox::CanonicalTopic> top_topics_to_clear;
68   for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
69     for (const TopicAndDomains& topic_and_domains :
70          epoch.top_topics_and_observing_domains()) {
71       if (!topic_and_domains.IsValid()) {
72         continue;
73       }
74       privacy_sandbox::CanonicalTopic canonical_topic =
75           privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
76                                           epoch.taxonomy_version());
77       if (!is_topic_allowed_by_settings.Run(canonical_topic)) {
78         top_topics_to_clear.emplace_back(canonical_topic);
79       }
80     }
81   }
82   return top_topics_to_clear;
83 }
84
85 struct StartupCalculateDecision {
86   bool clear_all_topics_data = true;
87   base::TimeDelta next_calculation_delay;
88   std::vector<privacy_sandbox::CanonicalTopic> topics_to_clear;
89 };
90
91 StartupCalculateDecision GetStartupCalculationDecision(
92     const BrowsingTopicsState& browsing_topics_state,
93     base::Time browsing_topics_data_accessible_since,
94     base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
95         is_topic_allowed_by_settings) {
96   // The topics have never been calculated. This could happen with a fresh
97   // profile or the if the config has updated. In case of a config update, the
98   // topics should have already been cleared when initializing the
99   // `BrowsingTopicsState`.
100   if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
101     return StartupCalculateDecision{.clear_all_topics_data = false,
102                                     .next_calculation_delay = base::TimeDelta(),
103                                     .topics_to_clear = {}};
104   }
105
106   // This could happen when clear-on-exit is turned on and has caused the
107   // cookies to be deleted on startup
108   bool should_clear_all_topics_data = ShouldClearTopicsOnStartup(
109       browsing_topics_state, browsing_topics_data_accessible_since);
110
111   std::vector<privacy_sandbox::CanonicalTopic> topics_to_clear;
112   if (!should_clear_all_topics_data) {
113     topics_to_clear = TopTopicsToClearOnStartup(browsing_topics_state,
114                                                 is_topic_allowed_by_settings);
115   }
116
117   base::TimeDelta presumed_next_calculation_delay =
118       browsing_topics_state.next_scheduled_calculation_time() -
119       base::Time::Now();
120
121   // The scheduled calculation time was reached before the startup.
122   if (presumed_next_calculation_delay <= base::TimeDelta()) {
123     return StartupCalculateDecision{
124         .clear_all_topics_data = should_clear_all_topics_data,
125         .next_calculation_delay = base::TimeDelta(),
126         .topics_to_clear = topics_to_clear};
127   }
128
129   // This could happen if the machine time has changed since the last
130   // calculation. Recalculate immediately to align with the expected schedule
131   // rather than potentially stop computing for a very long time.
132   if (presumed_next_calculation_delay >=
133       2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
134     return StartupCalculateDecision{
135         .clear_all_topics_data = should_clear_all_topics_data,
136         .next_calculation_delay = base::TimeDelta(),
137         .topics_to_clear = topics_to_clear};
138   }
139
140   return StartupCalculateDecision{
141       .clear_all_topics_data = should_clear_all_topics_data,
142       .next_calculation_delay = presumed_next_calculation_delay,
143       .topics_to_clear = topics_to_clear};
144 }
145
146 void RecordBrowsingTopicsApiResultMetrics(ApiAccessResult result,
147                                           content::RenderFrameHost* main_frame,
148                                           bool is_get_topics_request) {
149   // The `BrowsingTopics_DocumentBrowsingTopicsApiResult2` event is only
150   // recorded for request that gets the topics.
151   if (!is_get_topics_request) {
152     return;
153   }
154
155   base::UmaHistogramEnumeration("BrowsingTopics.Result.Status", result);
156
157   if (result == browsing_topics::ApiAccessResult::kSuccess) {
158     return;
159   }
160
161   CHECK(!main_frame->IsInLifecycleState(
162       content::RenderFrameHost::LifecycleState::kPrerendering));
163   ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
164   ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
165       main_frame->GetPageUkmSourceId());
166   builder.SetFailureReason(static_cast<int64_t>(result));
167
168   builder.Record(ukm_recorder->Get());
169 }
170
171 void RecordBrowsingTopicsApiResultMetrics(
172     const std::vector<CandidateTopic>& valid_candidate_topics,
173     content::RenderFrameHost* main_frame) {
174   CHECK(!main_frame->IsInLifecycleState(
175       content::RenderFrameHost::LifecycleState::kPrerendering));
176   ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
177   ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
178       main_frame->GetPageUkmSourceId());
179
180   int real_count = 0;
181   int fake_count = 0;
182   int filtered_count = 0;
183
184   for (size_t i = 0; i < 3u && valid_candidate_topics.size() > i; ++i) {
185     const CandidateTopic& candidate_topic = valid_candidate_topics[i];
186
187     DCHECK(candidate_topic.IsValid());
188
189     if (candidate_topic.should_be_filtered()) {
190       filtered_count += 1;
191     } else {
192       candidate_topic.is_true_topic() ? real_count += 1 : fake_count += 1;
193     }
194
195     if (i == 0) {
196       builder.SetCandidateTopic0(candidate_topic.topic().value())
197           .SetCandidateTopic0IsTrueTopTopic(candidate_topic.is_true_topic())
198           .SetCandidateTopic0ShouldBeFiltered(
199               candidate_topic.should_be_filtered())
200           .SetCandidateTopic0TaxonomyVersion(candidate_topic.taxonomy_version())
201           .SetCandidateTopic0ModelVersion(candidate_topic.model_version());
202     } else if (i == 1) {
203       builder.SetCandidateTopic1(candidate_topic.topic().value())
204           .SetCandidateTopic1IsTrueTopTopic(candidate_topic.is_true_topic())
205           .SetCandidateTopic1ShouldBeFiltered(
206               candidate_topic.should_be_filtered())
207           .SetCandidateTopic1TaxonomyVersion(candidate_topic.taxonomy_version())
208           .SetCandidateTopic1ModelVersion(candidate_topic.model_version());
209     } else {
210       DCHECK_EQ(i, 2u);
211       builder.SetCandidateTopic2(candidate_topic.topic().value())
212           .SetCandidateTopic2IsTrueTopTopic(candidate_topic.is_true_topic())
213           .SetCandidateTopic2ShouldBeFiltered(
214               candidate_topic.should_be_filtered())
215           .SetCandidateTopic2TaxonomyVersion(candidate_topic.taxonomy_version())
216           .SetCandidateTopic2ModelVersion(candidate_topic.model_version());
217     }
218   }
219
220   const int kBuckets = 10;
221   DCHECK_GE(kBuckets,
222             blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get());
223
224   base::UmaHistogramExactLinear("BrowsingTopics.Result.RealTopicCount",
225                                 real_count, kBuckets);
226   base::UmaHistogramExactLinear("BrowsingTopics.Result.FakeTopicCount",
227                                 fake_count, kBuckets);
228   base::UmaHistogramExactLinear("BrowsingTopics.Result.FilteredTopicCount",
229                                 filtered_count, kBuckets);
230
231   builder.Record(ukm_recorder->Get());
232 }
233
234 // Represents the action type of the request.
235 //
236 // These values are persisted to logs. Entries should not be renumbered and
237 // numeric values should never be reused.
238 enum class BrowsingTopicsApiActionType {
239   // Get topics via document.browsingTopics({skipObservation: true}).
240   kGetViaDocumentApi = 0,
241
242   // Get and observe topics via the document.browsingTopics().
243   kGetAndObserveViaDocumentApi = 1,
244
245   // Get topics via fetch(<url>, {browsingTopics: true}) or via the analogous
246   // XHR request.
247   kGetViaFetchLikeApi = 2,
248
249   // Observe topics via the "Sec-Browsing-Topics: ?1" response header for the
250   // fetch(<url>, {browsingTopics: true}) request, or for the analogous XHR
251   // request.
252   kObserveViaFetchLikeApi = 3,
253
254   // Get topics via <iframe src=[url] browsingtopics>.
255   kGetViaIframeAttributeApi = 4,
256
257   // Observe topics via the "Sec-Browsing-Topics: ?1" response header for the
258   // <iframe src=[url] browsingtopics> request.
259   kObserveViaIframeAttributeApi = 5,
260
261   kMaxValue = kObserveViaIframeAttributeApi,
262 };
263
264 void RecordBrowsingTopicsApiActionTypeMetrics(ApiCallerSource caller_source,
265                                               bool get_topics,
266                                               bool observe) {
267   static constexpr char kBrowsingTopicsApiActionTypeHistogramId[] =
268       "BrowsingTopics.ApiActionType";
269
270   if (caller_source == ApiCallerSource::kJavaScript) {
271     DCHECK(get_topics);
272
273     if (!observe) {
274       base::UmaHistogramEnumeration(
275           kBrowsingTopicsApiActionTypeHistogramId,
276           BrowsingTopicsApiActionType::kGetViaDocumentApi);
277       return;
278     }
279
280     base::UmaHistogramEnumeration(
281         kBrowsingTopicsApiActionTypeHistogramId,
282         BrowsingTopicsApiActionType::kGetAndObserveViaDocumentApi);
283
284     return;
285   }
286
287   if (caller_source == ApiCallerSource::kIframeAttribute) {
288     if (get_topics) {
289       DCHECK(!observe);
290
291       base::UmaHistogramEnumeration(
292           kBrowsingTopicsApiActionTypeHistogramId,
293           BrowsingTopicsApiActionType::kGetViaIframeAttributeApi);
294       return;
295     }
296
297     DCHECK(observe);
298     base::UmaHistogramEnumeration(
299         kBrowsingTopicsApiActionTypeHistogramId,
300         BrowsingTopicsApiActionType::kObserveViaIframeAttributeApi);
301
302     return;
303   }
304
305   DCHECK_EQ(caller_source, ApiCallerSource::kFetch);
306
307   if (get_topics) {
308     DCHECK(!observe);
309
310     base::UmaHistogramEnumeration(
311         kBrowsingTopicsApiActionTypeHistogramId,
312         BrowsingTopicsApiActionType::kGetViaFetchLikeApi);
313     return;
314   }
315
316   DCHECK(observe);
317   base::UmaHistogramEnumeration(
318       kBrowsingTopicsApiActionTypeHistogramId,
319       BrowsingTopicsApiActionType::kObserveViaFetchLikeApi);
320 }
321
322 std::set<HashedDomain> GetAllObservingDomains(
323     const BrowsingTopicsState& browsing_topics_state) {
324   std::set<HashedDomain> observing_domains;
325   for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
326     for (const auto& topic_and_domains :
327          epoch.top_topics_and_observing_domains()) {
328       observing_domains.insert(topic_and_domains.hashed_domains().begin(),
329                                topic_and_domains.hashed_domains().end());
330     }
331   }
332   return observing_domains;
333 }
334
335 }  // namespace
336
337 BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
338
339 BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
340     const base::FilePath& profile_path,
341     privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
342     history::HistoryService* history_service,
343     content::BrowsingTopicsSiteDataManager* site_data_manager,
344     std::unique_ptr<Annotator> annotator,
345     TopicAccessedCallback topic_accessed_callback)
346     : privacy_sandbox_settings_(privacy_sandbox_settings),
347       history_service_(history_service),
348       site_data_manager_(site_data_manager),
349       browsing_topics_state_(
350           profile_path,
351           base::BindOnce(
352               &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
353               base::Unretained(this))),
354       annotator_(std::move(annotator)),
355       topic_accessed_callback_(std::move(topic_accessed_callback)) {
356   DCHECK(topic_accessed_callback_);
357   privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
358   history_service_observation_.Observe(history_service);
359 }
360
361 bool BrowsingTopicsServiceImpl::HandleTopicsWebApi(
362     const url::Origin& context_origin,
363     content::RenderFrameHost* main_frame,
364     ApiCallerSource caller_source,
365     bool get_topics,
366     bool observe,
367     std::vector<blink::mojom::EpochTopicPtr>& topics) {
368   DCHECK(topics.empty());
369   DCHECK(get_topics || observe);
370
371   RecordBrowsingTopicsApiActionTypeMetrics(caller_source, get_topics, observe);
372
373   if (!browsing_topics_state_loaded_) {
374     RecordBrowsingTopicsApiResultMetrics(ApiAccessResult::kStateNotReady,
375                                          main_frame, get_topics);
376     return false;
377   }
378
379   if (!privacy_sandbox_settings_->IsTopicsAllowed()) {
380     RecordBrowsingTopicsApiResultMetrics(
381         ApiAccessResult::kAccessDisallowedBySettings, main_frame, get_topics);
382     return false;
383   }
384
385   if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
386           /*top_frame_origin=*/main_frame->GetLastCommittedOrigin(),
387           context_origin.GetURL(), main_frame)) {
388     RecordBrowsingTopicsApiResultMetrics(
389         ApiAccessResult::kAccessDisallowedBySettings, main_frame, get_topics);
390     return false;
391   }
392
393   RecordBrowsingTopicsApiResultMetrics(ApiAccessResult::kSuccess, main_frame,
394                                        get_topics);
395
396   std::string context_domain =
397       net::registry_controlled_domains::GetDomainAndRegistry(
398           context_origin.GetURL(),
399           net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
400
401   HashedDomain hashed_context_domain = HashContextDomainForStorage(
402       browsing_topics_state_.hmac_key(), context_domain);
403
404   if (observe) {
405     // Track the API usage context after the permissions check.
406     BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
407         ->OnBrowsingTopicsApiUsed(hashed_context_domain, context_domain,
408                                   history_service_);
409   }
410
411   if (!get_topics) {
412     return true;
413   }
414
415   std::string top_domain =
416       net::registry_controlled_domains::GetDomainAndRegistry(
417           main_frame->GetLastCommittedOrigin().GetURL(),
418           net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
419
420   std::vector<CandidateTopic> valid_candidate_topics;
421
422   for (const EpochTopics* epoch :
423        browsing_topics_state_.EpochsForSite(top_domain)) {
424     CandidateTopic candidate_topic = epoch->CandidateTopicForSite(
425         top_domain, hashed_context_domain, browsing_topics_state_.hmac_key());
426
427     if (!candidate_topic.IsValid()) {
428       continue;
429     }
430
431     // Although a top topic can never be in the disallowed state, the returned
432     // `candidate_topic` may be the random one. Thus we still need this check.
433     if (!privacy_sandbox_settings_->IsTopicAllowed(
434             privacy_sandbox::CanonicalTopic(
435                 candidate_topic.topic(), candidate_topic.taxonomy_version()))) {
436       DCHECK(!candidate_topic.is_true_topic());
437       continue;
438     }
439
440     valid_candidate_topics.push_back(std::move(candidate_topic));
441   }
442
443   RecordBrowsingTopicsApiResultMetrics(valid_candidate_topics, main_frame);
444
445   for (const CandidateTopic& candidate_topic : valid_candidate_topics) {
446     if (candidate_topic.should_be_filtered()) {
447       continue;
448     }
449
450     // `PageSpecificContentSettings` should only observe true top topics
451     // accessed on the page. It's okay to notify the same topic multiple
452     // times even though duplicate topics will be removed in the end.
453     if (candidate_topic.is_true_topic()) {
454       privacy_sandbox::CanonicalTopic canonical_topic(
455           candidate_topic.topic(), candidate_topic.taxonomy_version());
456       topic_accessed_callback_.Run(main_frame, context_origin,
457                                    /*blocked_by_policy=*/false,
458                                    canonical_topic);
459     }
460
461     auto result_topic = blink::mojom::EpochTopic::New();
462     result_topic->topic = candidate_topic.topic().value();
463     result_topic->config_version =
464         base::StrCat({"chrome.", base::NumberToString(CurrentConfigVersion())});
465     result_topic->model_version =
466         base::NumberToString(candidate_topic.model_version());
467     result_topic->taxonomy_version =
468         base::NumberToString(candidate_topic.taxonomy_version());
469     result_topic->version = base::StrCat({result_topic->config_version, ":",
470                                           result_topic->taxonomy_version, ":",
471                                           result_topic->model_version});
472     topics.emplace_back(std::move(result_topic));
473   }
474
475   // Sort result based on the version first, and then based on the topic ID.
476   // This groups the topics with the same version together, so that when
477   // transforming into the header format, all duplicate versions can be omitted.
478   std::sort(topics.begin(), topics.end(),
479             [](const blink::mojom::EpochTopicPtr& left,
480                const blink::mojom::EpochTopicPtr& right) {
481               if (left->version != right->version) {
482                 return left->version < right->version;
483               }
484
485               return left->topic < right->topic;
486             });
487
488   // Remove duplicate entries.
489   topics.erase(std::unique(topics.begin(), topics.end()), topics.end());
490
491   return true;
492 }
493
494 int BrowsingTopicsServiceImpl::NumVersionsInEpochs(
495     const url::Origin& main_frame_origin) const {
496   CHECK(browsing_topics_state_loaded_);
497   CHECK(privacy_sandbox_settings_->IsTopicsAllowed());
498
499   std::string main_frame_domain =
500       net::registry_controlled_domains::GetDomainAndRegistry(
501           main_frame_origin.GetURL(),
502           net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
503
504   std::set<std::pair<int, int64_t>> distinct_versions;
505   for (const EpochTopics* epoch :
506        browsing_topics_state_.EpochsForSite(main_frame_domain)) {
507     if (epoch->HasValidVersions()) {
508       distinct_versions.emplace(epoch->taxonomy_version(),
509                                 epoch->model_version());
510     }
511   }
512
513   return distinct_versions.size();
514 }
515
516 void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUi(
517     bool calculate_now,
518     mojom::PageHandler::GetBrowsingTopicsStateCallback callback) {
519   if (!browsing_topics_state_loaded_) {
520     std::move(callback).Run(
521         mojom::WebUIGetBrowsingTopicsStateResult::NewOverrideStatusMessage(
522             "State loading hasn't finished. Please retry shortly."));
523     return;
524   }
525
526   // If a calculation is already in progress, get the webui topics state after
527   // the calculation is done. Do this regardless of whether `calculate_now` is
528   // true, i.e. if `calculate_now` is true, this request is effectively merged
529   // with the in progress calculation.
530   if (topics_calculator_) {
531     get_state_for_webui_callbacks_.push_back(std::move(callback));
532     return;
533   }
534
535   DCHECK(schedule_calculate_timer_.IsRunning());
536
537   if (calculate_now) {
538     get_state_for_webui_callbacks_.push_back(std::move(callback));
539     schedule_calculate_timer_.AbandonAndStop();
540     CalculateBrowsingTopics(/*is_manually_triggered=*/true);
541     return;
542   }
543
544   site_data_manager_->GetContextDomainsFromHashedContextDomains(
545       GetAllObservingDomains(browsing_topics_state_),
546       base::BindOnce(
547           &BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper,
548           weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
549 }
550
551 std::vector<privacy_sandbox::CanonicalTopic>
552 BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
553   if (!browsing_topics_state_loaded_) {
554     return {};
555   }
556
557   std::vector<privacy_sandbox::CanonicalTopic> result;
558
559   for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
560     DCHECK_LE(epoch.padded_top_topics_start_index(),
561               epoch.top_topics_and_observing_domains().size());
562
563     for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
564       const TopicAndDomains& topic_and_domains =
565           epoch.top_topics_and_observing_domains()[i];
566
567       if (!topic_and_domains.IsValid()) {
568         continue;
569       }
570
571       // A top topic can never be in the disallowed state (i.e. it will be
572       // cleared when it becomes diallowed).
573       DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
574           privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
575                                           epoch.taxonomy_version())));
576
577       result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
578     }
579   }
580
581   return result;
582 }
583
584 Annotator* BrowsingTopicsServiceImpl::GetAnnotator() {
585   return annotator_.get();
586 }
587
588 void BrowsingTopicsServiceImpl::ClearTopic(
589     const privacy_sandbox::CanonicalTopic& canonical_topic) {
590   if (!browsing_topics_state_loaded_) {
591     return;
592   }
593
594   browsing_topics_state_.ClearTopic(canonical_topic.topic_id());
595 }
596
597 void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
598     const url::Origin& origin) {
599   if (!browsing_topics_state_loaded_) {
600     return;
601   }
602
603   std::string context_domain =
604       net::registry_controlled_domains::GetDomainAndRegistry(
605           origin.GetURL(),
606           net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
607
608   HashedDomain hashed_context_domain = HashContextDomainForStorage(
609       browsing_topics_state_.hmac_key(), context_domain);
610
611   browsing_topics_state_.ClearContextDomain(hashed_context_domain);
612   site_data_manager_->ClearContextDomain(hashed_context_domain);
613 }
614
615 void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
616   if (!browsing_topics_state_loaded_) {
617     return;
618   }
619
620   browsing_topics_state_.ClearAllTopics();
621   site_data_manager_->ExpireDataBefore(base::Time::Now());
622 }
623
624 std::unique_ptr<BrowsingTopicsCalculator>
625 BrowsingTopicsServiceImpl::CreateCalculator(
626     privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
627     history::HistoryService* history_service,
628     content::BrowsingTopicsSiteDataManager* site_data_manager,
629     Annotator* annotator,
630     const base::circular_deque<EpochTopics>& epochs,
631     bool is_manually_triggered,
632     BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
633   return std::make_unique<BrowsingTopicsCalculator>(
634       privacy_sandbox_settings, history_service, site_data_manager, annotator,
635       epochs, is_manually_triggered, std::move(callback));
636 }
637
638 const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
639   return browsing_topics_state_;
640 }
641
642 void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
643     base::TimeDelta delay) {
644   DCHECK(browsing_topics_state_loaded_);
645
646   // `this` owns the timer, which is automatically cancelled on destruction, so
647   // base::Unretained(this) is safe.
648   schedule_calculate_timer_.Start(
649       FROM_HERE, delay,
650       base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
651                      base::Unretained(this), /*is_manually_triggered=*/false));
652 }
653
654 void BrowsingTopicsServiceImpl::CalculateBrowsingTopics(
655     bool is_manually_triggered) {
656   DCHECK(browsing_topics_state_loaded_);
657
658   DCHECK(!topics_calculator_);
659
660   // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
661   // the callback once it's destroyed.
662   topics_calculator_ = CreateCalculator(
663       privacy_sandbox_settings_, history_service_, site_data_manager_,
664       annotator_.get(), browsing_topics_state_.epochs(), is_manually_triggered,
665       base::BindOnce(
666           &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
667           base::Unretained(this)));
668 }
669
670 void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
671     EpochTopics epoch_topics) {
672   DCHECK(browsing_topics_state_loaded_);
673
674   DCHECK(topics_calculator_);
675   topics_calculator_.reset();
676
677   if (!browsing_topics_state_.epochs().empty()) {
678     // Use 24 days as the max value, because 24 days is the maximum number of
679     // days that works with UmaHistogramCustomTimes due to its conversion of
680     // times into milliseconds. We expect most values to be around
681     // `kBrowsingTopicsTimePeriodPerEpoch`.
682     base::UmaHistogramCustomTimes(
683         "BrowsingTopics.EpochTopicsCalculation.TimeBetweenCalculations",
684         epoch_topics.calculation_time() -
685             browsing_topics_state_.epochs().back().calculation_time(),
686         /*min=*/base::Seconds(1), /*max=*/base::Days(24), /*buckets=*/100);
687   }
688
689   absl::optional<EpochTopics> maybe_removed_epoch =
690       browsing_topics_state_.AddEpoch(std::move(epoch_topics));
691   if (maybe_removed_epoch.has_value()) {
692     site_data_manager_->ExpireDataBefore(
693         maybe_removed_epoch->calculation_time() -
694         blink::features::
695                 kBrowsingTopicsNumberOfEpochsOfObservationDataToUseForFiltering
696                     .Get() *
697             blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
698   }
699   browsing_topics_state_.UpdateNextScheduledCalculationTime();
700
701   ScheduleBrowsingTopicsCalculation(
702       blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
703
704   for (auto& callback : get_state_for_webui_callbacks_) {
705     site_data_manager_->GetContextDomainsFromHashedContextDomains(
706         GetAllObservingDomains(browsing_topics_state_),
707         base::BindOnce(
708             &BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper,
709             weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
710   }
711   get_state_for_webui_callbacks_.clear();
712 }
713
714 void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
715   DCHECK(!browsing_topics_state_loaded_);
716   browsing_topics_state_loaded_ = true;
717
718   base::Time browsing_topics_data_sccessible_since =
719       privacy_sandbox_settings_->TopicsDataAccessibleSince();
720
721   StartupCalculateDecision decision = GetStartupCalculationDecision(
722       browsing_topics_state_, browsing_topics_data_sccessible_since,
723       base::BindRepeating(
724           &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
725           base::Unretained(privacy_sandbox_settings_)));
726
727   if (decision.clear_all_topics_data) {
728     browsing_topics_state_.ClearAllTopics();
729   } else if (!decision.topics_to_clear.empty()) {
730     for (const privacy_sandbox::CanonicalTopic& canonical_topic :
731          decision.topics_to_clear) {
732       browsing_topics_state_.ClearTopic(canonical_topic.topic_id());
733     }
734   }
735
736   site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
737
738   ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
739 }
740
741 void BrowsingTopicsServiceImpl::Shutdown() {
742   privacy_sandbox_settings_observation_.Reset();
743   history_service_observation_.Reset();
744 }
745
746 void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
747   if (!browsing_topics_state_loaded_) {
748     return;
749   }
750
751   // Here we rely on the fact that `browsing_topics_data_accessible_since` can
752   // only be updated to base::Time::Now() due to data deletion. In this case, we
753   // should just clear all topics.
754   browsing_topics_state_.ClearAllTopics();
755   site_data_manager_->ExpireDataBefore(
756       privacy_sandbox_settings_->TopicsDataAccessibleSince());
757
758   // Abort the outstanding topics calculation and restart immediately.
759   if (topics_calculator_) {
760     DCHECK(!schedule_calculate_timer_.IsRunning());
761
762     bool is_manually_triggered = topics_calculator_->is_manually_triggered();
763     topics_calculator_.reset();
764     CalculateBrowsingTopics(is_manually_triggered);
765   }
766 }
767
768 void BrowsingTopicsServiceImpl::OnURLsDeleted(
769     history::HistoryService* history_service,
770     const history::DeletionInfo& deletion_info) {
771   if (!browsing_topics_state_loaded_) {
772     return;
773   }
774
775   // Ignore invalid time_range.
776   if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid()) {
777     return;
778   }
779
780   for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
781     const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
782
783     if (epoch_topics.empty()) {
784       continue;
785     }
786
787     // The typical case is assumed here. We cannot always derive the original
788     // history start time, as the necessary data (e.g. its previous epoch's
789     // calculation time) may have been gone.
790     base::Time history_data_start_time =
791         epoch_topics.calculation_time() -
792         blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
793
794     bool time_range_overlap =
795         epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
796         history_data_start_time <= deletion_info.time_range().end();
797
798     if (time_range_overlap) {
799       browsing_topics_state_.ClearOneEpoch(i);
800     }
801   }
802
803   // If there's an outstanding topics calculation, abort and restart it.
804   if (topics_calculator_) {
805     DCHECK(!schedule_calculate_timer_.IsRunning());
806
807     bool is_manually_triggered = topics_calculator_->is_manually_triggered();
808     topics_calculator_.reset();
809     CalculateBrowsingTopics(is_manually_triggered);
810   }
811 }
812
813 void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper(
814     mojom::PageHandler::GetBrowsingTopicsStateCallback callback,
815     std::map<HashedDomain, std::string> hashed_to_unhashed_context_domains) {
816   DCHECK(browsing_topics_state_loaded_);
817   DCHECK(!topics_calculator_);
818
819   auto webui_state = mojom::WebUIBrowsingTopicsState::New();
820
821   webui_state->next_scheduled_calculation_time =
822       browsing_topics_state_.next_scheduled_calculation_time();
823
824   for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
825     DCHECK_LE(epoch.padded_top_topics_start_index(),
826               epoch.top_topics_and_observing_domains().size());
827
828     // Note: for a failed epoch calculation, the default zero-initialized values
829     // will be displayed in the Web UI.
830     auto webui_epoch = mojom::WebUIEpoch::New();
831     webui_epoch->calculation_time = epoch.calculation_time();
832     webui_epoch->model_version = base::NumberToString(epoch.model_version());
833     webui_epoch->taxonomy_version =
834         base::NumberToString(epoch.taxonomy_version());
835
836     for (size_t i = 0; i < epoch.top_topics_and_observing_domains().size();
837          ++i) {
838       const TopicAndDomains& topic_and_domains =
839           epoch.top_topics_and_observing_domains()[i];
840
841       privacy_sandbox::CanonicalTopic canonical_topic =
842           privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
843                                           epoch.taxonomy_version());
844
845       std::vector<std::string> webui_observed_by_domains;
846       webui_observed_by_domains.reserve(
847           topic_and_domains.hashed_domains().size());
848       for (const HashedDomain& hashed_domain :
849            topic_and_domains.hashed_domains()) {
850         auto it = hashed_to_unhashed_context_domains.find(hashed_domain);
851         if (it != hashed_to_unhashed_context_domains.end()) {
852           webui_observed_by_domains.push_back(it->second);
853         } else {
854           // Default to the hashed value if we don't have the original.
855           webui_observed_by_domains.push_back(
856               base::NumberToString(hashed_domain.value()));
857         }
858       }
859
860       // Note: if the topic is invalid (i.e. cleared), the output `topic_id`
861       // will be 0; if the topic is invalid, or if the taxonomy version isn't
862       // recognized by this Chrome binary, the output `topic_name` will be
863       // "Unknown".
864       auto webui_topic = mojom::WebUITopic::New();
865       webui_topic->topic_id = topic_and_domains.topic().value();
866       webui_topic->topic_name = canonical_topic.GetLocalizedRepresentation();
867       webui_topic->is_real_topic = (i < epoch.padded_top_topics_start_index());
868       webui_topic->observed_by_domains = std::move(webui_observed_by_domains);
869
870       webui_epoch->topics.push_back(std::move(webui_topic));
871     }
872
873     webui_state->epochs.push_back(std::move(webui_epoch));
874   }
875
876   // Reorder the epochs from latest to oldest.
877   base::ranges::reverse(webui_state->epochs);
878
879   std::move(callback).Run(
880       mojom::WebUIGetBrowsingTopicsStateResult::NewBrowsingTopicsState(
881           std::move(webui_state)));
882 }
883
884 }  // namespace browsing_topics