1 // Copyright 2017 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/ukm/ukm_service.h"
9 #include <unordered_set>
12 #include "base/feature_list.h"
13 #include "base/functional/bind.h"
14 #include "base/metrics/field_trial.h"
15 #include "base/metrics/field_trial_params.h"
16 #include "base/metrics/histogram_functions.h"
17 #include "base/metrics/histogram_macros.h"
18 #include "base/rand_util.h"
19 #include "base/time/time.h"
20 #include "build/build_config.h"
21 #include "components/metrics/log_decoder.h"
22 #include "components/metrics/metrics_features.h"
23 #include "components/metrics/metrics_log.h"
24 #include "components/metrics/metrics_service_client.h"
25 #include "components/metrics/ukm_demographic_metrics_provider.h"
26 #include "components/prefs/pref_registry_simple.h"
27 #include "components/prefs/pref_service.h"
28 #include "components/ukm/scheme_constants.h"
29 #include "components/ukm/ukm_pref_names.h"
30 #include "components/ukm/ukm_recorder_impl.h"
31 #include "components/ukm/ukm_rotation_scheduler.h"
32 #include "services/metrics/public/cpp/delegating_ukm_recorder.h"
33 #include "services/metrics/public/cpp/ukm_recorder_client_interface_registry.h"
34 #include "services/metrics/public/cpp/ukm_source_id.h"
35 #include "third_party/metrics_proto/ukm/report.pb.h"
36 #include "third_party/metrics_proto/user_demographics.pb.h"
42 // Generates a new client id and stores it in prefs.
43 uint64_t GenerateAndStoreClientId(PrefService* pref_service) {
44 uint64_t client_id = 0;
46 client_id = base::RandUint64();
47 pref_service->SetUint64(prefs::kUkmClientId, client_id);
49 // Also reset the session id counter.
50 pref_service->SetInteger(prefs::kUkmSessionId, 0);
54 uint64_t LoadOrGenerateAndStoreClientId(PrefService* pref_service,
55 uint64_t external_client_id) {
56 // If external_client_id is present, save to pref service for
57 // consistency purpose and return it as client id.
58 if (external_client_id) {
59 pref_service->SetUint64(prefs::kUkmClientId, external_client_id);
60 return external_client_id;
63 uint64_t client_id = pref_service->GetUint64(prefs::kUkmClientId);
64 // The pref is stored as a string and GetUint64() uses base::StringToUint64()
65 // to convert it. base::StringToUint64() will treat a negative value as
66 // underflow, which results in 0 (the minimum Uint64 value).
68 UMA_HISTOGRAM_BOOLEAN("UKM.MigratedClientIdInt64ToUInt64", false);
72 // Since client_id was 0, the pref value may have been negative. Attempt to
73 // get it as an Int64 to migrate it to Uint64.
74 client_id = pref_service->GetInt64(prefs::kUkmClientId);
76 pref_service->SetUint64(prefs::kUkmClientId, client_id);
77 UMA_HISTOGRAM_BOOLEAN("UKM.MigratedClientIdInt64ToUInt64", true);
81 // The client_id is still 0, so it wasn't set.
82 return GenerateAndStoreClientId(pref_service);
85 int32_t LoadAndIncrementSessionId(PrefService* pref_service) {
86 int32_t session_id = pref_service->GetInteger(prefs::kUkmSessionId);
87 ++session_id; // Increment session id, once per session.
88 pref_service->SetInteger(prefs::kUkmSessionId, session_id);
92 // Remove elements satisfying the predicate by moving them to the end of the
93 // list then truncate.
94 template <typename Predicate, typename ReadElements, typename WriteElements>
95 void FilterReportElements(Predicate predicate,
96 const ReadElements& elements,
97 WriteElements* mutable_elements) {
101 int entries_size = elements.size();
103 int end = entries_size - 1;
104 while (start < end) {
105 while (start < entries_size && !predicate(elements.Get(start))) {
108 while (end >= 0 && predicate(elements.Get(end))) {
112 mutable_elements->SwapElements(start, end);
117 mutable_elements->DeleteSubrange(start, entries_size - start);
120 template <typename Predicate>
121 void PurgeDataFromUnsentLogStore(metrics::UnsentLogStore* ukm_log_store,
122 Predicate source_purging_condition) {
123 for (size_t index = 0; index < ukm_log_store->size(); index++) {
124 // Decode log data from store back into a Report.
126 bool decode_success = metrics::DecodeLogDataToProto(
127 ukm_log_store->GetLogAtIndex(index), &report);
128 DCHECK(decode_success);
130 std::unordered_set<SourceId> relevant_source_ids;
132 // Grab ids of all sources satisfying the condition for purging.
133 for (const auto& source : report.sources()) {
134 if (source_purging_condition(source)) {
135 relevant_source_ids.insert(source.id());
138 if (relevant_source_ids.empty())
141 // Remove all relevant sources from the report.
142 FilterReportElements(
143 [&](const Source& element) {
144 return relevant_source_ids.count(element.id());
146 report.sources(), report.mutable_sources());
148 // Remove all entries originating from these sources.
149 FilterReportElements(
150 [&](const Entry& element) {
151 return relevant_source_ids.count(element.source_id());
153 report.entries(), report.mutable_entries());
155 std::string reserialized_log_data =
156 UkmService::SerializeReportProtoToString(&report);
158 // Replace the compressed log in the store by its filtered version.
159 const std::string old_compressed_log_data =
160 ukm_log_store->ReplaceLogAtIndex(index, reserialized_log_data,
161 metrics::LogMetadata());
163 // Reached here only if some Sources satisfied the condition for purging, so
164 // reserialized data should now be different.
165 DCHECK(ukm_log_store->GetLogAtIndex(index) != old_compressed_log_data);
172 BASE_FEATURE(kReportUserNoisedUserBirthYearAndGender,
173 "UkmReportNoisedUserBirthYearAndGender",
174 base::FEATURE_ENABLED_BY_DEFAULT);
176 bool UkmService::LogCanBeParsed(const std::string& serialized_data) {
178 bool report_parse_successful = report.ParseFromString(serialized_data);
179 if (!report_parse_successful)
181 // Make sure the reserialized log from this |report| matches the input
182 // |serialized_data|.
183 std::string reserialized_from_report;
184 report.SerializeToString(&reserialized_from_report);
185 return reserialized_from_report == serialized_data;
188 std::string UkmService::SerializeReportProtoToString(Report* report) {
189 std::string serialized_full_log;
190 report->SerializeToString(&serialized_full_log);
192 // This allows catching errors with bad UKM serialization we've seen before
193 // that would otherwise only be noticed on the server.
194 DCHECK(UkmService::LogCanBeParsed(serialized_full_log));
195 return serialized_full_log;
198 UkmService::UkmService(PrefService* pref_service,
199 metrics::MetricsServiceClient* client,
200 std::unique_ptr<metrics::UkmDemographicMetricsProvider>
201 demographics_provider,
202 uint64_t external_client_id)
203 : recorder_client_registry_(
204 std::make_unique<metrics::UkmRecorderClientInterfaceRegistry>()),
205 pref_service_(pref_service),
206 external_client_id_(external_client_id),
208 demographics_provider_(std::move(demographics_provider)),
209 reporting_service_(client, pref_service),
210 task_runner_(base::SequencedTaskRunner::GetCurrentDefault()) {
211 DCHECK(pref_service_);
213 DVLOG(1) << "UkmService::Constructor";
214 reporting_service_.Initialize();
216 if (base::FeatureList::IsEnabled(
217 metrics::features::kMetricsClearLogsOnClonedInstall)) {
218 cloned_install_subscription_ = client->AddOnClonedInstallDetectedCallback(
219 base::BindOnce(&UkmService::OnClonedInstallDetected,
220 self_ptr_factory_.GetWeakPtr()));
223 base::RepeatingClosure rotate_callback = base::BindRepeating(
224 &UkmService::RotateLog, self_ptr_factory_.GetWeakPtr());
225 // MetricsServiceClient outlives UkmService, and
226 // MetricsReportingScheduler is tied to the lifetime of |this|.
227 const base::RepeatingCallback<base::TimeDelta(void)>&
228 get_upload_interval_callback =
229 base::BindRepeating(&metrics::MetricsServiceClient::GetUploadInterval,
230 base::Unretained(client_));
231 bool fast_startup_for_testing = client_->ShouldStartUpFastForTesting();
232 scheduler_ = std::make_unique<UkmRotationScheduler>(
233 rotate_callback, fast_startup_for_testing, get_upload_interval_callback);
236 DelegatingUkmRecorder::Get()->AddDelegate(self_ptr_factory_.GetWeakPtr());
239 UkmService::~UkmService() {
241 DelegatingUkmRecorder::Get()->RemoveDelegate(this);
244 void UkmService::Initialize() {
245 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
246 DCHECK(!initialize_started_);
247 DVLOG(1) << "UkmService::Initialize";
248 initialize_started_ = true;
250 DCHECK_EQ(0, report_count_);
251 if (client_->ShouldResetClientIdsOnClonedInstall()) {
252 ResetClientState(ResetReason::kClonedInstall);
255 LoadOrGenerateAndStoreClientId(pref_service_, external_client_id_);
256 session_id_ = LoadAndIncrementSessionId(pref_service_);
259 metrics_providers_.Init();
264 void UkmService::EnableReporting() {
265 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
266 DVLOG(1) << "UkmService::EnableReporting";
267 if (reporting_service_.reporting_active())
270 log_creation_time_ = base::TimeTicks::Now();
271 metrics_providers_.OnRecordingEnabled();
273 if (!initialize_started_)
276 reporting_service_.EnableReporting();
279 void UkmService::DisableReporting() {
280 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
281 DVLOG(1) << "UkmService::DisableReporting";
283 reporting_service_.DisableReporting();
285 metrics_providers_.OnRecordingDisabled();
288 Flush(metrics::MetricsLogsEventManager::CreateReason::kServiceShutdown);
291 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_IOS)
292 void UkmService::OnAppEnterForeground() {
293 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
294 DVLOG(1) << "UkmService::OnAppEnterForeground";
296 reporting_service_.SetIsInForegound(true);
298 // If initialize_started_ is false, UKM has not yet been started, so bail. The
299 // scheduler will instead be started via EnableReporting().
300 if (!initialize_started_)
306 void UkmService::OnAppEnterBackground() {
307 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
308 DVLOG(1) << "UkmService::OnAppEnterBackground";
310 reporting_service_.SetIsInForegound(false);
312 if (!initialize_started_)
317 // Give providers a chance to persist ukm data as part of being backgrounded.
318 metrics_providers_.OnAppEnterBackground();
320 Flush(metrics::MetricsLogsEventManager::CreateReason::kBackgrounded);
324 void UkmService::Flush(metrics::MetricsLogsEventManager::CreateReason reason) {
325 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
326 if (initialize_complete_)
327 BuildAndStoreLog(reason);
328 reporting_service_.ukm_log_store()->TrimAndPersistUnsentLogs(
329 /*overwrite_in_memory_store=*/true);
332 void UkmService::Purge() {
333 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
334 DVLOG(1) << "UkmService::Purge";
335 reporting_service_.ukm_log_store()->Purge();
336 UkmRecorderImpl::Purge();
339 void UkmService::PurgeExtensionsData() {
340 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
341 DVLOG(1) << "UkmService::PurgeExtensionsData";
342 // Filter out any extension-related data from the serialized logs in the
343 // UnsentLogStore for uploading, base on having kExtensionScheme URL scheme.
344 PurgeDataFromUnsentLogStore(
345 reporting_service_.ukm_log_store(), [&](const Source& source) {
346 // Check if any URL on the Source has the kExtensionScheme URL scheme.
347 // It is possible that only one of multiple URLs does due to redirect,
348 // in this case, we should still purge the source.
349 for (const auto& url_info : source.urls()) {
350 if (GURL(url_info.url()).SchemeIs(kExtensionScheme))
356 // Purge data currently in the recordings intended for the next
358 UkmRecorderImpl::PurgeRecordingsWithUrlScheme(kExtensionScheme);
361 void UkmService::PurgeAppsData() {
362 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
363 DVLOG(1) << "UkmService::PurgeAppsData";
364 // Filter out any apps-related data from the serialized logs in the
365 // UnsentLogStore for uploading.
366 // Also purge based on source id type, because some apps don't use app://
368 // For example, OS Settings is an ChromeOS app with "chrome://os-settings" as
370 PurgeDataFromUnsentLogStore(
371 reporting_service_.ukm_log_store(), [&](const Source& source) {
372 if (GetSourceIdType(source.id()) == SourceIdType::APP_ID)
374 for (const auto& url_info : source.urls()) {
375 if (GURL(url_info.url()).SchemeIs(kAppScheme))
381 // Purge data currently in the recordings intended for the next ukm::Report.
382 UkmRecorderImpl::PurgeRecordingsWithUrlScheme(kAppScheme);
383 UkmRecorderImpl::PurgeRecordingsWithSourceIdType(SourceIdType::APP_ID);
386 void UkmService::PurgeMsbbData() {
387 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
388 // Filter out any MSBB-related data from the serialized logs in the
389 // UnsentLogStore for uploading.
390 PurgeDataFromUnsentLogStore(
391 reporting_service_.ukm_log_store(), [&](const Source& source) {
392 return UkmRecorderImpl::GetConsentType(GetSourceIdType(source.id())) ==
396 // Purge data currently in the recordings intended for the next ukm::Report.
397 UkmRecorderImpl::PurgeRecordingsWithMsbbSources();
400 void UkmService::ResetClientState(ResetReason reason) {
401 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
403 UMA_HISTOGRAM_ENUMERATION("UKM.ResetReason", reason);
405 if (external_client_id_) {
406 client_id_ = external_client_id_;
407 pref_service_->SetUint64(prefs::kUkmClientId, client_id_);
409 client_id_ = GenerateAndStoreClientId(pref_service_);
412 // Note: the session_id has already been cleared by GenerateAndStoreClientId.
413 session_id_ = LoadAndIncrementSessionId(pref_service_);
416 metrics_providers_.OnClientStateCleared();
419 void UkmService::OnClonedInstallDetected() {
420 // Purge all logs, as they may come from a previous install. Unfortunately,
421 // since the cloned install detector works asynchronously, it is possible that
422 // this is called after logs were already sent. However, practically speaking,
423 // this should not happen, since logs are only sent late into the session.
424 reporting_service_.ukm_log_store()->Purge();
427 void UkmService::RegisterMetricsProvider(
428 std::unique_ptr<metrics::MetricsProvider> provider) {
429 metrics_providers_.RegisterMetricsProvider(std::move(provider));
432 void UkmService::RegisterEventFilter(std::unique_ptr<UkmEntryFilter> filter) {
433 SetEntryFilter(std::move(filter));
437 void UkmService::RegisterPrefs(PrefRegistrySimple* registry) {
438 registry->RegisterUint64Pref(prefs::kUkmClientId, 0);
439 registry->RegisterIntegerPref(prefs::kUkmSessionId, 0);
440 UkmReportingService::RegisterPrefs(registry);
443 void UkmService::OnRecorderParametersChanged() {
444 task_runner_->PostTask(
445 FROM_HERE, base::BindOnce(&UkmService::OnRecorderParametersChangedImpl,
446 self_ptr_factory_.GetWeakPtr()));
449 void UkmService::OnRecorderParametersChangedImpl() {
450 auto params = mojom::UkmRecorderParameters::New();
451 params->is_enabled = recording_enabled();
453 std::set<uint64_t> events = GetObservedEventHashes();
454 params->event_hash_bypass_list.insert(params->event_hash_bypass_list.end(),
455 events.begin(), events.end());
456 recorder_client_registry_->SetRecorderParameters(std::move(params));
459 void UkmService::StartInitTask() {
460 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
461 DVLOG(1) << "UkmService::StartInitTask";
462 metrics_providers_.AsyncInit(base::BindOnce(&UkmService::FinishedInitTask,
463 self_ptr_factory_.GetWeakPtr()));
466 void UkmService::FinishedInitTask() {
467 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
468 DVLOG(1) << "UkmService::FinishedInitTask";
469 initialize_complete_ = true;
470 scheduler_->InitTaskComplete();
471 if (initialization_complete_callback_) {
472 std::move(initialization_complete_callback_).Run();
476 void UkmService::RotateLog() {
477 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
478 DVLOG(1) << "UkmService::RotateLog";
479 if (!reporting_service_.ukm_log_store()->has_unsent_logs())
480 BuildAndStoreLog(metrics::MetricsLogsEventManager::CreateReason::kPeriodic);
481 reporting_service_.Start();
482 scheduler_->RotationFinished();
485 void UkmService::AddSyncedUserNoiseBirthYearAndGenderToReport(Report* report) {
486 if (!base::FeatureList::IsEnabled(kReportUserNoisedUserBirthYearAndGender) ||
487 !demographics_provider_) {
491 demographics_provider_->ProvideSyncedUserNoisedBirthYearAndGenderToReport(
495 void UkmService::BuildAndStoreLog(
496 metrics::MetricsLogsEventManager::CreateReason reason) {
497 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
498 DVLOG(1) << "UkmService::BuildAndStoreLog";
500 // This may add new UKMs. This means this needs to be done before the empty
501 // log suppression checks.
502 metrics_providers_.ProvideCurrentSessionUKMData();
504 // Suppress generating a log if we have no new data to include.
505 bool empty = sources().empty() && entries().empty();
506 UMA_HISTOGRAM_BOOLEAN("UKM.BuildAndStoreLogIsEmpty", empty);
511 report.set_client_id(client_id_);
512 report.set_session_id(session_id_);
513 report.set_report_id(++report_count_);
515 const auto product = static_cast<metrics::ChromeUserMetricsExtension_Product>(
516 client_->GetProduct());
517 // Only set the product if it differs from the default value.
518 if (product != report.product())
519 report.set_product(product);
521 StoreRecordingsInReport(&report);
523 metrics::MetricsLog::RecordCoreSystemProfile(client_,
524 report.mutable_system_profile());
526 metrics_providers_.ProvideSystemProfileMetricsWithLogCreationTime(
527 log_creation_time_, report.mutable_system_profile());
529 AddSyncedUserNoiseBirthYearAndGenderToReport(&report);
531 std::string serialized_log =
532 UkmService::SerializeReportProtoToString(&report);
533 metrics::LogMetadata log_metadata;
534 reporting_service_.ukm_log_store()->StoreLog(serialized_log, log_metadata,
538 void UkmService::SetInitializationCompleteCallbackForTesting(
539 base::OnceClosure callback) {
540 if (initialize_complete_) {
541 std::move(callback).Run();
543 // Store the callback to be invoked when initialization is complete later.
544 initialization_complete_callback_ = std::move(callback);