1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/browsing_topics/browsing_topics_service_impl.h"
10 #include "base/functional/bind.h"
11 #include "base/metrics/histogram_functions.h"
12 #include "base/notreached.h"
13 #include "base/rand_util.h"
14 #include "base/ranges/algorithm.h"
15 #include "base/strings/strcat.h"
16 #include "base/time/time.h"
17 #include "components/browsing_topics/browsing_topics_calculator.h"
18 #include "components/browsing_topics/browsing_topics_page_load_data_tracker.h"
19 #include "components/browsing_topics/common/common_types.h"
20 #include "components/browsing_topics/mojom/browsing_topics_internals.mojom.h"
21 #include "components/browsing_topics/util.h"
22 #include "components/privacy_sandbox/canonical_topic.h"
23 #include "content/public/browser/browsing_topics_site_data_manager.h"
24 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
25 #include "services/metrics/public/cpp/ukm_builders.h"
26 #include "services/metrics/public/cpp/ukm_recorder.h"
27 #include "third_party/blink/public/common/features.h"
28 #include "third_party/blink/public/mojom/browsing_topics/browsing_topics.mojom.h"
30 namespace browsing_topics {
34 // Returns whether the topics should all be cleared given
35 // `browsing_topics_data_accessible_since` and `is_topic_allowed_by_settings`.
36 // Returns true if `browsing_topics_data_accessible_since` is greater than the
37 // last calculation time.
38 bool ShouldClearTopicsOnStartup(
39 const BrowsingTopicsState& browsing_topics_state,
40 base::Time browsing_topics_data_accessible_since) {
41 if (browsing_topics_state.epochs().empty()) {
45 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
46 // only be updated to base::Time::Now() due to data deletion. So we'll either
47 // need to clear all topics data, or no-op. If this assumption no longer
48 // holds, we'd need to iterate over all epochs, check their calculation time,
49 // and selectively delete the epochs.
50 if (browsing_topics_data_accessible_since >
51 browsing_topics_state.epochs().back().calculation_time()) {
58 // Returns a vector of top topics which are disallowed and thus should be
59 // cleared. This could happen if the topic became disallowed when
60 // `browsing_topics_state` was still loading (and we didn't get a chance to
62 std::vector<privacy_sandbox::CanonicalTopic> TopTopicsToClearOnStartup(
63 const BrowsingTopicsState& browsing_topics_state,
64 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
65 is_topic_allowed_by_settings) {
66 DCHECK(!is_topic_allowed_by_settings.is_null());
67 std::vector<privacy_sandbox::CanonicalTopic> top_topics_to_clear;
68 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
69 for (const TopicAndDomains& topic_and_domains :
70 epoch.top_topics_and_observing_domains()) {
71 if (!topic_and_domains.IsValid()) {
74 privacy_sandbox::CanonicalTopic canonical_topic =
75 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
76 epoch.taxonomy_version());
77 if (!is_topic_allowed_by_settings.Run(canonical_topic)) {
78 top_topics_to_clear.emplace_back(canonical_topic);
82 return top_topics_to_clear;
85 struct StartupCalculateDecision {
86 bool clear_all_topics_data = true;
87 base::TimeDelta next_calculation_delay;
88 std::vector<privacy_sandbox::CanonicalTopic> topics_to_clear;
91 StartupCalculateDecision GetStartupCalculationDecision(
92 const BrowsingTopicsState& browsing_topics_state,
93 base::Time browsing_topics_data_accessible_since,
94 base::RepeatingCallback<bool(const privacy_sandbox::CanonicalTopic&)>
95 is_topic_allowed_by_settings) {
96 // The topics have never been calculated. This could happen with a fresh
97 // profile or the if the config has updated. In case of a config update, the
98 // topics should have already been cleared when initializing the
99 // `BrowsingTopicsState`.
100 if (browsing_topics_state.next_scheduled_calculation_time().is_null()) {
101 return StartupCalculateDecision{.clear_all_topics_data = false,
102 .next_calculation_delay = base::TimeDelta(),
103 .topics_to_clear = {}};
106 // This could happen when clear-on-exit is turned on and has caused the
107 // cookies to be deleted on startup
108 bool should_clear_all_topics_data = ShouldClearTopicsOnStartup(
109 browsing_topics_state, browsing_topics_data_accessible_since);
111 std::vector<privacy_sandbox::CanonicalTopic> topics_to_clear;
112 if (!should_clear_all_topics_data) {
113 topics_to_clear = TopTopicsToClearOnStartup(browsing_topics_state,
114 is_topic_allowed_by_settings);
117 base::TimeDelta presumed_next_calculation_delay =
118 browsing_topics_state.next_scheduled_calculation_time() -
121 // The scheduled calculation time was reached before the startup.
122 if (presumed_next_calculation_delay <= base::TimeDelta()) {
123 return StartupCalculateDecision{
124 .clear_all_topics_data = should_clear_all_topics_data,
125 .next_calculation_delay = base::TimeDelta(),
126 .topics_to_clear = topics_to_clear};
129 // This could happen if the machine time has changed since the last
130 // calculation. Recalculate immediately to align with the expected schedule
131 // rather than potentially stop computing for a very long time.
132 if (presumed_next_calculation_delay >=
133 2 * blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()) {
134 return StartupCalculateDecision{
135 .clear_all_topics_data = should_clear_all_topics_data,
136 .next_calculation_delay = base::TimeDelta(),
137 .topics_to_clear = topics_to_clear};
140 return StartupCalculateDecision{
141 .clear_all_topics_data = should_clear_all_topics_data,
142 .next_calculation_delay = presumed_next_calculation_delay,
143 .topics_to_clear = topics_to_clear};
146 void RecordBrowsingTopicsApiResultMetrics(ApiAccessResult result,
147 content::RenderFrameHost* main_frame,
148 bool is_get_topics_request) {
149 // The `BrowsingTopics_DocumentBrowsingTopicsApiResult2` event is only
150 // recorded for request that gets the topics.
151 if (!is_get_topics_request) {
155 base::UmaHistogramEnumeration("BrowsingTopics.Result.Status", result);
157 if (result == browsing_topics::ApiAccessResult::kSuccess) {
161 CHECK(!main_frame->IsInLifecycleState(
162 content::RenderFrameHost::LifecycleState::kPrerendering));
163 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
164 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
165 main_frame->GetPageUkmSourceId());
166 builder.SetFailureReason(static_cast<int64_t>(result));
168 builder.Record(ukm_recorder->Get());
171 void RecordBrowsingTopicsApiResultMetrics(
172 const std::vector<CandidateTopic>& valid_candidate_topics,
173 content::RenderFrameHost* main_frame) {
174 CHECK(!main_frame->IsInLifecycleState(
175 content::RenderFrameHost::LifecycleState::kPrerendering));
176 ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
177 ukm::builders::BrowsingTopics_DocumentBrowsingTopicsApiResult2 builder(
178 main_frame->GetPageUkmSourceId());
182 int filtered_count = 0;
184 for (size_t i = 0; i < 3u && valid_candidate_topics.size() > i; ++i) {
185 const CandidateTopic& candidate_topic = valid_candidate_topics[i];
187 DCHECK(candidate_topic.IsValid());
189 if (candidate_topic.should_be_filtered()) {
192 candidate_topic.is_true_topic() ? real_count += 1 : fake_count += 1;
196 builder.SetCandidateTopic0(candidate_topic.topic().value())
197 .SetCandidateTopic0IsTrueTopTopic(candidate_topic.is_true_topic())
198 .SetCandidateTopic0ShouldBeFiltered(
199 candidate_topic.should_be_filtered())
200 .SetCandidateTopic0TaxonomyVersion(candidate_topic.taxonomy_version())
201 .SetCandidateTopic0ModelVersion(candidate_topic.model_version());
203 builder.SetCandidateTopic1(candidate_topic.topic().value())
204 .SetCandidateTopic1IsTrueTopTopic(candidate_topic.is_true_topic())
205 .SetCandidateTopic1ShouldBeFiltered(
206 candidate_topic.should_be_filtered())
207 .SetCandidateTopic1TaxonomyVersion(candidate_topic.taxonomy_version())
208 .SetCandidateTopic1ModelVersion(candidate_topic.model_version());
211 builder.SetCandidateTopic2(candidate_topic.topic().value())
212 .SetCandidateTopic2IsTrueTopTopic(candidate_topic.is_true_topic())
213 .SetCandidateTopic2ShouldBeFiltered(
214 candidate_topic.should_be_filtered())
215 .SetCandidateTopic2TaxonomyVersion(candidate_topic.taxonomy_version())
216 .SetCandidateTopic2ModelVersion(candidate_topic.model_version());
220 const int kBuckets = 10;
222 blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get());
224 base::UmaHistogramExactLinear("BrowsingTopics.Result.RealTopicCount",
225 real_count, kBuckets);
226 base::UmaHistogramExactLinear("BrowsingTopics.Result.FakeTopicCount",
227 fake_count, kBuckets);
228 base::UmaHistogramExactLinear("BrowsingTopics.Result.FilteredTopicCount",
229 filtered_count, kBuckets);
231 builder.Record(ukm_recorder->Get());
234 // Represents the action type of the request.
236 // These values are persisted to logs. Entries should not be renumbered and
237 // numeric values should never be reused.
238 enum class BrowsingTopicsApiActionType {
239 // Get topics via document.browsingTopics({skipObservation: true}).
240 kGetViaDocumentApi = 0,
242 // Get and observe topics via the document.browsingTopics().
243 kGetAndObserveViaDocumentApi = 1,
245 // Get topics via fetch(<url>, {browsingTopics: true}) or via the analogous
247 kGetViaFetchLikeApi = 2,
249 // Observe topics via the "Sec-Browsing-Topics: ?1" response header for the
250 // fetch(<url>, {browsingTopics: true}) request, or for the analogous XHR
252 kObserveViaFetchLikeApi = 3,
254 // Get topics via <iframe src=[url] browsingtopics>.
255 kGetViaIframeAttributeApi = 4,
257 // Observe topics via the "Sec-Browsing-Topics: ?1" response header for the
258 // <iframe src=[url] browsingtopics> request.
259 kObserveViaIframeAttributeApi = 5,
261 kMaxValue = kObserveViaIframeAttributeApi,
264 void RecordBrowsingTopicsApiActionTypeMetrics(ApiCallerSource caller_source,
267 static constexpr char kBrowsingTopicsApiActionTypeHistogramId[] =
268 "BrowsingTopics.ApiActionType";
270 if (caller_source == ApiCallerSource::kJavaScript) {
274 base::UmaHistogramEnumeration(
275 kBrowsingTopicsApiActionTypeHistogramId,
276 BrowsingTopicsApiActionType::kGetViaDocumentApi);
280 base::UmaHistogramEnumeration(
281 kBrowsingTopicsApiActionTypeHistogramId,
282 BrowsingTopicsApiActionType::kGetAndObserveViaDocumentApi);
287 if (caller_source == ApiCallerSource::kIframeAttribute) {
291 base::UmaHistogramEnumeration(
292 kBrowsingTopicsApiActionTypeHistogramId,
293 BrowsingTopicsApiActionType::kGetViaIframeAttributeApi);
298 base::UmaHistogramEnumeration(
299 kBrowsingTopicsApiActionTypeHistogramId,
300 BrowsingTopicsApiActionType::kObserveViaIframeAttributeApi);
305 DCHECK_EQ(caller_source, ApiCallerSource::kFetch);
310 base::UmaHistogramEnumeration(
311 kBrowsingTopicsApiActionTypeHistogramId,
312 BrowsingTopicsApiActionType::kGetViaFetchLikeApi);
317 base::UmaHistogramEnumeration(
318 kBrowsingTopicsApiActionTypeHistogramId,
319 BrowsingTopicsApiActionType::kObserveViaFetchLikeApi);
322 std::set<HashedDomain> GetAllObservingDomains(
323 const BrowsingTopicsState& browsing_topics_state) {
324 std::set<HashedDomain> observing_domains;
325 for (const EpochTopics& epoch : browsing_topics_state.epochs()) {
326 for (const auto& topic_and_domains :
327 epoch.top_topics_and_observing_domains()) {
328 observing_domains.insert(topic_and_domains.hashed_domains().begin(),
329 topic_and_domains.hashed_domains().end());
332 return observing_domains;
337 BrowsingTopicsServiceImpl::~BrowsingTopicsServiceImpl() = default;
339 BrowsingTopicsServiceImpl::BrowsingTopicsServiceImpl(
340 const base::FilePath& profile_path,
341 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
342 history::HistoryService* history_service,
343 content::BrowsingTopicsSiteDataManager* site_data_manager,
344 std::unique_ptr<Annotator> annotator,
345 TopicAccessedCallback topic_accessed_callback)
346 : privacy_sandbox_settings_(privacy_sandbox_settings),
347 history_service_(history_service),
348 site_data_manager_(site_data_manager),
349 browsing_topics_state_(
352 &BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded,
353 base::Unretained(this))),
354 annotator_(std::move(annotator)),
355 topic_accessed_callback_(std::move(topic_accessed_callback)) {
356 DCHECK(topic_accessed_callback_);
357 privacy_sandbox_settings_observation_.Observe(privacy_sandbox_settings);
358 history_service_observation_.Observe(history_service);
361 bool BrowsingTopicsServiceImpl::HandleTopicsWebApi(
362 const url::Origin& context_origin,
363 content::RenderFrameHost* main_frame,
364 ApiCallerSource caller_source,
367 std::vector<blink::mojom::EpochTopicPtr>& topics) {
368 DCHECK(topics.empty());
369 DCHECK(get_topics || observe);
371 RecordBrowsingTopicsApiActionTypeMetrics(caller_source, get_topics, observe);
373 if (!browsing_topics_state_loaded_) {
374 RecordBrowsingTopicsApiResultMetrics(ApiAccessResult::kStateNotReady,
375 main_frame, get_topics);
379 if (!privacy_sandbox_settings_->IsTopicsAllowed()) {
380 RecordBrowsingTopicsApiResultMetrics(
381 ApiAccessResult::kAccessDisallowedBySettings, main_frame, get_topics);
385 if (!privacy_sandbox_settings_->IsTopicsAllowedForContext(
386 /*top_frame_origin=*/main_frame->GetLastCommittedOrigin(),
387 context_origin.GetURL(), main_frame)) {
388 RecordBrowsingTopicsApiResultMetrics(
389 ApiAccessResult::kAccessDisallowedBySettings, main_frame, get_topics);
393 RecordBrowsingTopicsApiResultMetrics(ApiAccessResult::kSuccess, main_frame,
396 std::string context_domain =
397 net::registry_controlled_domains::GetDomainAndRegistry(
398 context_origin.GetURL(),
399 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
401 HashedDomain hashed_context_domain = HashContextDomainForStorage(
402 browsing_topics_state_.hmac_key(), context_domain);
405 // Track the API usage context after the permissions check.
406 BrowsingTopicsPageLoadDataTracker::GetOrCreateForPage(main_frame->GetPage())
407 ->OnBrowsingTopicsApiUsed(hashed_context_domain, context_domain,
415 std::string top_domain =
416 net::registry_controlled_domains::GetDomainAndRegistry(
417 main_frame->GetLastCommittedOrigin().GetURL(),
418 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
420 std::vector<CandidateTopic> valid_candidate_topics;
422 for (const EpochTopics* epoch :
423 browsing_topics_state_.EpochsForSite(top_domain)) {
424 CandidateTopic candidate_topic = epoch->CandidateTopicForSite(
425 top_domain, hashed_context_domain, browsing_topics_state_.hmac_key());
427 if (!candidate_topic.IsValid()) {
431 // Although a top topic can never be in the disallowed state, the returned
432 // `candidate_topic` may be the random one. Thus we still need this check.
433 if (!privacy_sandbox_settings_->IsTopicAllowed(
434 privacy_sandbox::CanonicalTopic(
435 candidate_topic.topic(), candidate_topic.taxonomy_version()))) {
436 DCHECK(!candidate_topic.is_true_topic());
440 valid_candidate_topics.push_back(std::move(candidate_topic));
443 RecordBrowsingTopicsApiResultMetrics(valid_candidate_topics, main_frame);
445 for (const CandidateTopic& candidate_topic : valid_candidate_topics) {
446 if (candidate_topic.should_be_filtered()) {
450 // `PageSpecificContentSettings` should only observe true top topics
451 // accessed on the page. It's okay to notify the same topic multiple
452 // times even though duplicate topics will be removed in the end.
453 if (candidate_topic.is_true_topic()) {
454 privacy_sandbox::CanonicalTopic canonical_topic(
455 candidate_topic.topic(), candidate_topic.taxonomy_version());
456 topic_accessed_callback_.Run(main_frame, context_origin,
457 /*blocked_by_policy=*/false,
461 auto result_topic = blink::mojom::EpochTopic::New();
462 result_topic->topic = candidate_topic.topic().value();
463 result_topic->config_version =
464 base::StrCat({"chrome.", base::NumberToString(CurrentConfigVersion())});
465 result_topic->model_version =
466 base::NumberToString(candidate_topic.model_version());
467 result_topic->taxonomy_version =
468 base::NumberToString(candidate_topic.taxonomy_version());
469 result_topic->version = base::StrCat({result_topic->config_version, ":",
470 result_topic->taxonomy_version, ":",
471 result_topic->model_version});
472 topics.emplace_back(std::move(result_topic));
475 // Sort result based on the version first, and then based on the topic ID.
476 // This groups the topics with the same version together, so that when
477 // transforming into the header format, all duplicate versions can be omitted.
478 std::sort(topics.begin(), topics.end(),
479 [](const blink::mojom::EpochTopicPtr& left,
480 const blink::mojom::EpochTopicPtr& right) {
481 if (left->version != right->version) {
482 return left->version < right->version;
485 return left->topic < right->topic;
488 // Remove duplicate entries.
489 topics.erase(std::unique(topics.begin(), topics.end()), topics.end());
494 int BrowsingTopicsServiceImpl::NumVersionsInEpochs(
495 const url::Origin& main_frame_origin) const {
496 CHECK(browsing_topics_state_loaded_);
497 CHECK(privacy_sandbox_settings_->IsTopicsAllowed());
499 std::string main_frame_domain =
500 net::registry_controlled_domains::GetDomainAndRegistry(
501 main_frame_origin.GetURL(),
502 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
504 std::set<std::pair<int, int64_t>> distinct_versions;
505 for (const EpochTopics* epoch :
506 browsing_topics_state_.EpochsForSite(main_frame_domain)) {
507 if (epoch->HasValidVersions()) {
508 distinct_versions.emplace(epoch->taxonomy_version(),
509 epoch->model_version());
513 return distinct_versions.size();
516 void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUi(
518 mojom::PageHandler::GetBrowsingTopicsStateCallback callback) {
519 if (!browsing_topics_state_loaded_) {
520 std::move(callback).Run(
521 mojom::WebUIGetBrowsingTopicsStateResult::NewOverrideStatusMessage(
522 "State loading hasn't finished. Please retry shortly."));
526 // If a calculation is already in progress, get the webui topics state after
527 // the calculation is done. Do this regardless of whether `calculate_now` is
528 // true, i.e. if `calculate_now` is true, this request is effectively merged
529 // with the in progress calculation.
530 if (topics_calculator_) {
531 get_state_for_webui_callbacks_.push_back(std::move(callback));
535 DCHECK(schedule_calculate_timer_.IsRunning());
538 get_state_for_webui_callbacks_.push_back(std::move(callback));
539 schedule_calculate_timer_.AbandonAndStop();
540 CalculateBrowsingTopics(/*is_manually_triggered=*/true);
544 site_data_manager_->GetContextDomainsFromHashedContextDomains(
545 GetAllObservingDomains(browsing_topics_state_),
547 &BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper,
548 weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
551 std::vector<privacy_sandbox::CanonicalTopic>
552 BrowsingTopicsServiceImpl::GetTopTopicsForDisplay() const {
553 if (!browsing_topics_state_loaded_) {
557 std::vector<privacy_sandbox::CanonicalTopic> result;
559 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
560 DCHECK_LE(epoch.padded_top_topics_start_index(),
561 epoch.top_topics_and_observing_domains().size());
563 for (size_t i = 0; i < epoch.padded_top_topics_start_index(); ++i) {
564 const TopicAndDomains& topic_and_domains =
565 epoch.top_topics_and_observing_domains()[i];
567 if (!topic_and_domains.IsValid()) {
571 // A top topic can never be in the disallowed state (i.e. it will be
572 // cleared when it becomes diallowed).
573 DCHECK(privacy_sandbox_settings_->IsTopicAllowed(
574 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
575 epoch.taxonomy_version())));
577 result.emplace_back(topic_and_domains.topic(), epoch.taxonomy_version());
584 Annotator* BrowsingTopicsServiceImpl::GetAnnotator() {
585 return annotator_.get();
588 void BrowsingTopicsServiceImpl::ClearTopic(
589 const privacy_sandbox::CanonicalTopic& canonical_topic) {
590 if (!browsing_topics_state_loaded_) {
594 browsing_topics_state_.ClearTopic(canonical_topic.topic_id());
597 void BrowsingTopicsServiceImpl::ClearTopicsDataForOrigin(
598 const url::Origin& origin) {
599 if (!browsing_topics_state_loaded_) {
603 std::string context_domain =
604 net::registry_controlled_domains::GetDomainAndRegistry(
606 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
608 HashedDomain hashed_context_domain = HashContextDomainForStorage(
609 browsing_topics_state_.hmac_key(), context_domain);
611 browsing_topics_state_.ClearContextDomain(hashed_context_domain);
612 site_data_manager_->ClearContextDomain(hashed_context_domain);
615 void BrowsingTopicsServiceImpl::ClearAllTopicsData() {
616 if (!browsing_topics_state_loaded_) {
620 browsing_topics_state_.ClearAllTopics();
621 site_data_manager_->ExpireDataBefore(base::Time::Now());
624 std::unique_ptr<BrowsingTopicsCalculator>
625 BrowsingTopicsServiceImpl::CreateCalculator(
626 privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings,
627 history::HistoryService* history_service,
628 content::BrowsingTopicsSiteDataManager* site_data_manager,
629 Annotator* annotator,
630 const base::circular_deque<EpochTopics>& epochs,
631 bool is_manually_triggered,
632 BrowsingTopicsCalculator::CalculateCompletedCallback callback) {
633 return std::make_unique<BrowsingTopicsCalculator>(
634 privacy_sandbox_settings, history_service, site_data_manager, annotator,
635 epochs, is_manually_triggered, std::move(callback));
638 const BrowsingTopicsState& BrowsingTopicsServiceImpl::browsing_topics_state() {
639 return browsing_topics_state_;
642 void BrowsingTopicsServiceImpl::ScheduleBrowsingTopicsCalculation(
643 base::TimeDelta delay) {
644 DCHECK(browsing_topics_state_loaded_);
646 // `this` owns the timer, which is automatically cancelled on destruction, so
647 // base::Unretained(this) is safe.
648 schedule_calculate_timer_.Start(
650 base::BindOnce(&BrowsingTopicsServiceImpl::CalculateBrowsingTopics,
651 base::Unretained(this), /*is_manually_triggered=*/false));
654 void BrowsingTopicsServiceImpl::CalculateBrowsingTopics(
655 bool is_manually_triggered) {
656 DCHECK(browsing_topics_state_loaded_);
658 DCHECK(!topics_calculator_);
660 // `this` owns `topics_calculator_` so `topics_calculator_` should not invoke
661 // the callback once it's destroyed.
662 topics_calculator_ = CreateCalculator(
663 privacy_sandbox_settings_, history_service_, site_data_manager_,
664 annotator_.get(), browsing_topics_state_.epochs(), is_manually_triggered,
666 &BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted,
667 base::Unretained(this)));
670 void BrowsingTopicsServiceImpl::OnCalculateBrowsingTopicsCompleted(
671 EpochTopics epoch_topics) {
672 DCHECK(browsing_topics_state_loaded_);
674 DCHECK(topics_calculator_);
675 topics_calculator_.reset();
677 if (!browsing_topics_state_.epochs().empty()) {
678 // Use 24 days as the max value, because 24 days is the maximum number of
679 // days that works with UmaHistogramCustomTimes due to its conversion of
680 // times into milliseconds. We expect most values to be around
681 // `kBrowsingTopicsTimePeriodPerEpoch`.
682 base::UmaHistogramCustomTimes(
683 "BrowsingTopics.EpochTopicsCalculation.TimeBetweenCalculations",
684 epoch_topics.calculation_time() -
685 browsing_topics_state_.epochs().back().calculation_time(),
686 /*min=*/base::Seconds(1), /*max=*/base::Days(24), /*buckets=*/100);
689 absl::optional<EpochTopics> maybe_removed_epoch =
690 browsing_topics_state_.AddEpoch(std::move(epoch_topics));
691 if (maybe_removed_epoch.has_value()) {
692 site_data_manager_->ExpireDataBefore(
693 maybe_removed_epoch->calculation_time() -
695 kBrowsingTopicsNumberOfEpochsOfObservationDataToUseForFiltering
697 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
699 browsing_topics_state_.UpdateNextScheduledCalculationTime();
701 ScheduleBrowsingTopicsCalculation(
702 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
704 for (auto& callback : get_state_for_webui_callbacks_) {
705 site_data_manager_->GetContextDomainsFromHashedContextDomains(
706 GetAllObservingDomains(browsing_topics_state_),
708 &BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper,
709 weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
711 get_state_for_webui_callbacks_.clear();
714 void BrowsingTopicsServiceImpl::OnBrowsingTopicsStateLoaded() {
715 DCHECK(!browsing_topics_state_loaded_);
716 browsing_topics_state_loaded_ = true;
718 base::Time browsing_topics_data_sccessible_since =
719 privacy_sandbox_settings_->TopicsDataAccessibleSince();
721 StartupCalculateDecision decision = GetStartupCalculationDecision(
722 browsing_topics_state_, browsing_topics_data_sccessible_since,
724 &privacy_sandbox::PrivacySandboxSettings::IsTopicAllowed,
725 base::Unretained(privacy_sandbox_settings_)));
727 if (decision.clear_all_topics_data) {
728 browsing_topics_state_.ClearAllTopics();
729 } else if (!decision.topics_to_clear.empty()) {
730 for (const privacy_sandbox::CanonicalTopic& canonical_topic :
731 decision.topics_to_clear) {
732 browsing_topics_state_.ClearTopic(canonical_topic.topic_id());
736 site_data_manager_->ExpireDataBefore(browsing_topics_data_sccessible_since);
738 ScheduleBrowsingTopicsCalculation(decision.next_calculation_delay);
741 void BrowsingTopicsServiceImpl::Shutdown() {
742 privacy_sandbox_settings_observation_.Reset();
743 history_service_observation_.Reset();
746 void BrowsingTopicsServiceImpl::OnTopicsDataAccessibleSinceUpdated() {
747 if (!browsing_topics_state_loaded_) {
751 // Here we rely on the fact that `browsing_topics_data_accessible_since` can
752 // only be updated to base::Time::Now() due to data deletion. In this case, we
753 // should just clear all topics.
754 browsing_topics_state_.ClearAllTopics();
755 site_data_manager_->ExpireDataBefore(
756 privacy_sandbox_settings_->TopicsDataAccessibleSince());
758 // Abort the outstanding topics calculation and restart immediately.
759 if (topics_calculator_) {
760 DCHECK(!schedule_calculate_timer_.IsRunning());
762 bool is_manually_triggered = topics_calculator_->is_manually_triggered();
763 topics_calculator_.reset();
764 CalculateBrowsingTopics(is_manually_triggered);
768 void BrowsingTopicsServiceImpl::OnURLsDeleted(
769 history::HistoryService* history_service,
770 const history::DeletionInfo& deletion_info) {
771 if (!browsing_topics_state_loaded_) {
775 // Ignore invalid time_range.
776 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid()) {
780 for (size_t i = 0; i < browsing_topics_state_.epochs().size(); ++i) {
781 const EpochTopics& epoch_topics = browsing_topics_state_.epochs()[i];
783 if (epoch_topics.empty()) {
787 // The typical case is assumed here. We cannot always derive the original
788 // history start time, as the necessary data (e.g. its previous epoch's
789 // calculation time) may have been gone.
790 base::Time history_data_start_time =
791 epoch_topics.calculation_time() -
792 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
794 bool time_range_overlap =
795 epoch_topics.calculation_time() >= deletion_info.time_range().begin() &&
796 history_data_start_time <= deletion_info.time_range().end();
798 if (time_range_overlap) {
799 browsing_topics_state_.ClearOneEpoch(i);
803 // If there's an outstanding topics calculation, abort and restart it.
804 if (topics_calculator_) {
805 DCHECK(!schedule_calculate_timer_.IsRunning());
807 bool is_manually_triggered = topics_calculator_->is_manually_triggered();
808 topics_calculator_.reset();
809 CalculateBrowsingTopics(is_manually_triggered);
813 void BrowsingTopicsServiceImpl::GetBrowsingTopicsStateForWebUiHelper(
814 mojom::PageHandler::GetBrowsingTopicsStateCallback callback,
815 std::map<HashedDomain, std::string> hashed_to_unhashed_context_domains) {
816 DCHECK(browsing_topics_state_loaded_);
817 DCHECK(!topics_calculator_);
819 auto webui_state = mojom::WebUIBrowsingTopicsState::New();
821 webui_state->next_scheduled_calculation_time =
822 browsing_topics_state_.next_scheduled_calculation_time();
824 for (const EpochTopics& epoch : browsing_topics_state_.epochs()) {
825 DCHECK_LE(epoch.padded_top_topics_start_index(),
826 epoch.top_topics_and_observing_domains().size());
828 // Note: for a failed epoch calculation, the default zero-initialized values
829 // will be displayed in the Web UI.
830 auto webui_epoch = mojom::WebUIEpoch::New();
831 webui_epoch->calculation_time = epoch.calculation_time();
832 webui_epoch->model_version = base::NumberToString(epoch.model_version());
833 webui_epoch->taxonomy_version =
834 base::NumberToString(epoch.taxonomy_version());
836 for (size_t i = 0; i < epoch.top_topics_and_observing_domains().size();
838 const TopicAndDomains& topic_and_domains =
839 epoch.top_topics_and_observing_domains()[i];
841 privacy_sandbox::CanonicalTopic canonical_topic =
842 privacy_sandbox::CanonicalTopic(topic_and_domains.topic(),
843 epoch.taxonomy_version());
845 std::vector<std::string> webui_observed_by_domains;
846 webui_observed_by_domains.reserve(
847 topic_and_domains.hashed_domains().size());
848 for (const HashedDomain& hashed_domain :
849 topic_and_domains.hashed_domains()) {
850 auto it = hashed_to_unhashed_context_domains.find(hashed_domain);
851 if (it != hashed_to_unhashed_context_domains.end()) {
852 webui_observed_by_domains.push_back(it->second);
854 // Default to the hashed value if we don't have the original.
855 webui_observed_by_domains.push_back(
856 base::NumberToString(hashed_domain.value()));
860 // Note: if the topic is invalid (i.e. cleared), the output `topic_id`
861 // will be 0; if the topic is invalid, or if the taxonomy version isn't
862 // recognized by this Chrome binary, the output `topic_name` will be
864 auto webui_topic = mojom::WebUITopic::New();
865 webui_topic->topic_id = topic_and_domains.topic().value();
866 webui_topic->topic_name = canonical_topic.GetLocalizedRepresentation();
867 webui_topic->is_real_topic = (i < epoch.padded_top_topics_start_index());
868 webui_topic->observed_by_domains = std::move(webui_observed_by_domains);
870 webui_epoch->topics.push_back(std::move(webui_topic));
873 webui_state->epochs.push_back(std::move(webui_epoch));
876 // Reorder the epochs from latest to oldest.
877 base::ranges::reverse(webui_state->epochs);
879 std::move(callback).Run(
880 mojom::WebUIGetBrowsingTopicsStateResult::NewBrowsingTopicsState(
881 std::move(webui_state)));
884 } // namespace browsing_topics