1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/browsing_topics/browsing_topics_state.h"
7 #include "base/base64.h"
8 #include "base/files/file_path.h"
9 #include "base/files/file_util.h"
10 #include "base/json/json_file_value_serializer.h"
11 #include "base/json/json_writer.h"
12 #include "base/json/values_util.h"
13 #include "base/metrics/histogram_functions.h"
14 #include "base/task/task_traits.h"
15 #include "base/task/thread_pool.h"
16 #include "components/browsing_topics/common/common_types.h"
17 #include "components/browsing_topics/util.h"
18 #include "third_party/blink/public/common/features.h"
20 namespace browsing_topics {
24 // How often the file is saved at most.
25 const base::TimeDelta kSaveDelay = base::Milliseconds(2500);
27 const char kEpochsNameKey[] = "epochs";
28 const char kNextScheduledCalculationTimeNameKey[] =
29 "next_scheduled_calculation_time";
30 const char kHexEncodedHmacKeyNameKey[] = "hex_encoded_hmac_key";
32 // `config_version` is a deprecated key. Do not reuse.
34 std::unique_ptr<BrowsingTopicsState::LoadResult> LoadFileOnBackendTaskRunner(
35 const base::FilePath& file_path) {
36 bool file_exists = base::PathExists(file_path);
39 return std::make_unique<BrowsingTopicsState::LoadResult>(
40 /*file_exists=*/false, nullptr);
43 JSONFileValueDeserializer deserializer(file_path);
44 std::unique_ptr<base::Value> value = deserializer.Deserialize(
45 /*error_code=*/nullptr,
46 /*error_message=*/nullptr);
48 return std::make_unique<BrowsingTopicsState::LoadResult>(/*file_exists=*/true,
52 bool AreConfigVersionsCompatible(int preexisting, int current) {
53 // The config version can be 0 for a failed topics calculation.
54 CHECK_GE(preexisting, 0);
56 CHECK_LE(current, ConfigVersion::kMaxValue);
58 // This could happen in rare case when Chrome rolls back to an earlier
60 if (preexisting > ConfigVersion::kMaxValue) {
64 // Epoch from a failed calculation is compatible with any version.
65 if (preexisting == 0) {
69 if (preexisting == current) {
73 if ((preexisting == ConfigVersion::kInitial &&
74 current == ConfigVersion::kUsePrioritizedTopicsList) ||
75 (preexisting == ConfigVersion::kUsePrioritizedTopicsList &&
76 current == ConfigVersion::kInitial)) {
77 // Versions 1 and 2 are forward and backward compatible.
85 BrowsingTopicsState::LoadResult::LoadResult(bool file_exists,
86 std::unique_ptr<base::Value> value)
87 : file_exists(file_exists), value(std::move(value)) {}
89 BrowsingTopicsState::LoadResult::~LoadResult() = default;
91 BrowsingTopicsState::BrowsingTopicsState(const base::FilePath& profile_path,
92 base::OnceClosure loaded_callback)
93 : backend_task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
94 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
95 base::TaskShutdownBehavior::BLOCK_SHUTDOWN})),
96 writer_(profile_path.Append(FILE_PATH_LITERAL("BrowsingTopicsState")),
99 /*histogram_suffix=*/"BrowsingTopicsState") {
100 backend_task_runner_->PostTaskAndReplyWithResult(
101 FROM_HERE, base::BindOnce(&LoadFileOnBackendTaskRunner, writer_.path()),
102 base::BindOnce(&BrowsingTopicsState::DidLoadFile,
103 weak_ptr_factory_.GetWeakPtr(),
104 std::move(loaded_callback)));
107 BrowsingTopicsState::~BrowsingTopicsState() {
108 if (writer_.HasPendingWrite()) {
109 writer_.DoScheduledWrite();
113 void BrowsingTopicsState::ClearAllTopics() {
116 if (!epochs_.empty()) {
122 void BrowsingTopicsState::ClearOneEpoch(size_t epoch_index) {
125 epochs_[epoch_index].ClearTopics();
129 void BrowsingTopicsState::ClearTopic(Topic topic) {
130 for (EpochTopics& epoch : epochs_) {
131 epoch.ClearTopic(topic);
137 void BrowsingTopicsState::ClearContextDomain(
138 const HashedDomain& hashed_context_domain) {
139 for (EpochTopics& epoch : epochs_) {
140 epoch.ClearContextDomain(hashed_context_domain);
146 absl::optional<EpochTopics> BrowsingTopicsState::AddEpoch(
147 EpochTopics epoch_topics) {
150 epochs_.push_back(std::move(epoch_topics));
152 // Remove the epoch data that is no longer useful.
153 absl::optional<EpochTopics> removed_epoch_topics;
156 blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get()) +
158 removed_epoch_topics = std::move(epochs_[0]);
163 return removed_epoch_topics;
166 void BrowsingTopicsState::UpdateNextScheduledCalculationTime() {
169 next_scheduled_calculation_time_ =
171 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get();
176 std::vector<const EpochTopics*> BrowsingTopicsState::EpochsForSite(
177 const std::string& top_domain) const {
180 const size_t kNumberOfEpochsToExpose = static_cast<size_t>(
181 blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get());
183 DCHECK_GT(kNumberOfEpochsToExpose, 0u);
185 // Derive a per-user per-site time delta in the range of
186 // [0, `kBrowsingTopicsMaxEpochIntroductionDelay`). The latest epoch will only
187 // be used after `site_sticky_time_delta` has elapsed since the last
188 // calculation finish time (i.e. `next_scheduled_calculation_time_` -
189 // `kBrowsingTopicsTimePeriodPerEpoch`). This way, each site will see a
190 // different epoch switch time.
191 base::TimeDelta site_sticky_time_delta =
192 CalculateSiteStickyTimeDelta(top_domain);
194 size_t end_epoch_index = 0;
195 if (base::Time::Now() <=
196 next_scheduled_calculation_time_ -
197 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get() +
198 site_sticky_time_delta) {
199 if (epochs_.size() < 2) {
203 end_epoch_index = epochs_.size() - 2;
205 if (epochs_.empty()) {
209 end_epoch_index = epochs_.size() - 1;
212 size_t start_epoch_index = (end_epoch_index + 1 >= kNumberOfEpochsToExpose)
213 ? end_epoch_index + 1 - kNumberOfEpochsToExpose
216 std::vector<const EpochTopics*> result;
218 for (size_t i = start_epoch_index; i <= end_epoch_index; ++i) {
219 result.emplace_back(&epochs_[i]);
225 bool BrowsingTopicsState::HasScheduledSaveForTesting() const {
226 return writer_.HasPendingWrite();
229 base::TimeDelta BrowsingTopicsState::CalculateSiteStickyTimeDelta(
230 const std::string& top_domain) const {
231 uint64_t epoch_switch_time_decision_hash =
232 HashTopDomainForEpochSwitchTimeDecision(hmac_key_, top_domain);
234 // Currently the browser can only reasonably support configurations where the
235 // random-over period is less or equal to an epoch, because 1) we only store
236 // one more epoch in addition to the number to expose to sites, and that would
237 // not be sufficient. 2) the calculation finish times (i.e. the actual epoch
238 // delimitation times) for previous epochs aren't stored, so we wouldn't be
239 // able to know when to use a previous epoch (or we'd need to approximate
240 // the delimitation time with the calculation start time, or based on its
241 // position in `epochs_`).
242 DCHECK_LE(blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get(),
243 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
245 DCHECK_GT(blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get()
249 // If the latest epoch was manually triggered, make the latest epoch
250 // immediately available for testing purposes.
251 if (!epochs_.empty() &&
252 epochs_.back().from_manually_triggered_calculation()) {
253 return base::Seconds(0);
256 return base::Seconds(
257 epoch_switch_time_decision_hash %
258 blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get()
262 base::ImportantFileWriter::BackgroundDataProducerCallback
263 BrowsingTopicsState::GetSerializedDataProducerForBackgroundSequence() {
266 return base::BindOnce(
267 [](base::Value value) -> absl::optional<std::string> {
268 // This runs on the background sequence.
270 if (!base::JSONWriter::WriteWithOptions(
271 value, base::JSONWriter::OPTIONS_PRETTY_PRINT, &output)) {
272 return absl::nullopt;
276 base::Value(ToDictValue()));
279 base::Value::Dict BrowsingTopicsState::ToDictValue() const {
282 base::Value::List epochs_list;
283 for (const EpochTopics& epoch : epochs_) {
284 epochs_list.Append(epoch.ToDictValue());
287 base::Value::Dict result_dict;
288 result_dict.Set(kEpochsNameKey, std::move(epochs_list));
290 result_dict.Set(kNextScheduledCalculationTimeNameKey,
291 base::TimeToValue(next_scheduled_calculation_time_));
293 std::string hex_encoded_hmac_key = base::HexEncode(hmac_key_);
294 result_dict.Set(kHexEncodedHmacKeyNameKey, base::HexEncode(hmac_key_));
299 void BrowsingTopicsState::ScheduleSave() {
301 writer_.ScheduleWriteWithBackgroundDataSerializer(this);
304 void BrowsingTopicsState::DidLoadFile(base::OnceClosure loaded_callback,
305 std::unique_ptr<LoadResult> load_result) {
309 bool success = false;
310 bool should_save_state_to_file = false;
312 if (!load_result->file_exists) {
313 // If this is the first time loading, generate a `hmac_key_`, and save it.
314 // This ensures we only generate the key once per profile, as data derived
315 // from the key may be subsequently stored elsewhere.
316 hmac_key_ = GenerateRandomHmacKey();
318 should_save_state_to_file = true;
319 } else if (!load_result->value) {
320 // If a file read error was encountered, or if the JSON deserialization
321 // failed in general, empty the file.
322 should_save_state_to_file = true;
324 // JSON deserialization succeeded in general. Parse the value to individual
326 ParseResult parse_result = ParseValue(*(load_result->value));
328 success = parse_result.success;
329 should_save_state_to_file = parse_result.should_save_state_to_file;
332 base::UmaHistogramBoolean(
333 "BrowsingTopics.BrowsingTopicsState.LoadFinishStatus", success);
337 if (should_save_state_to_file) {
341 std::move(loaded_callback).Run();
344 BrowsingTopicsState::ParseResult BrowsingTopicsState::ParseValue(
345 const base::Value& value) {
348 const base::Value::Dict* dict_value = value.GetIfDict();
350 return ParseResult{.success = false, .should_save_state_to_file = true};
353 const std::string* hex_encoded_hmac_key =
354 dict_value->FindString(kHexEncodedHmacKeyNameKey);
355 if (!hex_encoded_hmac_key) {
356 return ParseResult{.success = false, .should_save_state_to_file = true};
359 if (!base::HexStringToSpan(*hex_encoded_hmac_key, hmac_key_)) {
360 // `HexStringToSpan` may partially fill the `hmac_key_` up until the
361 // failure. Reset it to empty.
363 return ParseResult{.success = false, .should_save_state_to_file = true};
366 const base::Value::List* epochs_value = dict_value->FindList(kEpochsNameKey);
368 return ParseResult{.success = false, .should_save_state_to_file = true};
371 for (const base::Value& epoch_value : *epochs_value) {
372 const base::Value::Dict* epoch_dict_value = epoch_value.GetIfDict();
373 if (!epoch_dict_value) {
374 return ParseResult{.success = false, .should_save_state_to_file = true};
377 epochs_.push_back(EpochTopics::FromDictValue(*epoch_dict_value));
380 for (const EpochTopics& epoch : epochs_) {
381 // If any preexisting epoch's version is incompatible with the current
382 // version, start with a fresh `epoch_`.
383 if (!AreConfigVersionsCompatible(epoch.config_version(),
384 CurrentConfigVersion())) {
386 return ParseResult{.success = true, .should_save_state_to_file = true};
390 const base::Value* next_scheduled_calculation_time_value =
391 dict_value->Find(kNextScheduledCalculationTimeNameKey);
392 if (!next_scheduled_calculation_time_value) {
393 return ParseResult{.success = false, .should_save_state_to_file = true};
396 next_scheduled_calculation_time_ =
397 base::ValueToTime(next_scheduled_calculation_time_value).value();
399 return ParseResult{.success = true, .should_save_state_to_file = false};
402 } // namespace browsing_topics