Upload upstream chromium 114.0.5735.31
[platform/framework/web/chromium-efl.git] / components / ukm / ukm_service.cc
1 // Copyright 2017 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/ukm/ukm_service.h"
6
7 #include <memory>
8 #include <string>
9 #include <unordered_set>
10 #include <utility>
11
12 #include "base/feature_list.h"
13 #include "base/functional/bind.h"
14 #include "base/metrics/field_trial.h"
15 #include "base/metrics/field_trial_params.h"
16 #include "base/metrics/histogram_functions.h"
17 #include "base/metrics/histogram_macros.h"
18 #include "base/rand_util.h"
19 #include "base/time/time.h"
20 #include "build/build_config.h"
21 #include "components/metrics/log_decoder.h"
22 #include "components/metrics/metrics_features.h"
23 #include "components/metrics/metrics_log.h"
24 #include "components/metrics/metrics_service_client.h"
25 #include "components/metrics/ukm_demographic_metrics_provider.h"
26 #include "components/prefs/pref_registry_simple.h"
27 #include "components/prefs/pref_service.h"
28 #include "components/ukm/scheme_constants.h"
29 #include "components/ukm/ukm_pref_names.h"
30 #include "components/ukm/ukm_recorder_impl.h"
31 #include "components/ukm/ukm_rotation_scheduler.h"
32 #include "services/metrics/public/cpp/delegating_ukm_recorder.h"
33 #include "services/metrics/public/cpp/ukm_recorder_client_interface_registry.h"
34 #include "services/metrics/public/cpp/ukm_source_id.h"
35 #include "third_party/metrics_proto/ukm/report.pb.h"
36 #include "third_party/metrics_proto/user_demographics.pb.h"
37
38 namespace ukm {
39
40 namespace {
41
42 // Generates a new client id and stores it in prefs.
43 uint64_t GenerateAndStoreClientId(PrefService* pref_service) {
44   uint64_t client_id = 0;
45   while (!client_id)
46     client_id = base::RandUint64();
47   pref_service->SetUint64(prefs::kUkmClientId, client_id);
48
49   // Also reset the session id counter.
50   pref_service->SetInteger(prefs::kUkmSessionId, 0);
51   return client_id;
52 }
53
54 uint64_t LoadOrGenerateAndStoreClientId(PrefService* pref_service,
55                                         uint64_t external_client_id) {
56   // If external_client_id is present, save to pref service for
57   // consistency purpose and return it as client id.
58   if (external_client_id) {
59     pref_service->SetUint64(prefs::kUkmClientId, external_client_id);
60     return external_client_id;
61   }
62
63   uint64_t client_id = pref_service->GetUint64(prefs::kUkmClientId);
64   // The pref is stored as a string and GetUint64() uses base::StringToUint64()
65   // to convert it. base::StringToUint64() will treat a negative value as
66   // underflow, which results in 0 (the minimum Uint64 value).
67   if (client_id) {
68     UMA_HISTOGRAM_BOOLEAN("UKM.MigratedClientIdInt64ToUInt64", false);
69     return client_id;
70   }
71
72   // Since client_id was 0, the pref value may have been negative. Attempt to
73   // get it as an Int64 to migrate it to Uint64.
74   client_id = pref_service->GetInt64(prefs::kUkmClientId);
75   if (client_id) {
76     pref_service->SetUint64(prefs::kUkmClientId, client_id);
77     UMA_HISTOGRAM_BOOLEAN("UKM.MigratedClientIdInt64ToUInt64", true);
78     return client_id;
79   }
80
81   // The client_id is still 0, so it wasn't set.
82   return GenerateAndStoreClientId(pref_service);
83 }
84
85 int32_t LoadAndIncrementSessionId(PrefService* pref_service) {
86   int32_t session_id = pref_service->GetInteger(prefs::kUkmSessionId);
87   ++session_id;  // Increment session id, once per session.
88   pref_service->SetInteger(prefs::kUkmSessionId, session_id);
89   return session_id;
90 }
91
92 // Remove elements satisfying the predicate by moving them to the end of the
93 // list then truncate.
94 template <typename Predicate, typename ReadElements, typename WriteElements>
95 void FilterReportElements(Predicate predicate,
96                           const ReadElements& elements,
97                           WriteElements* mutable_elements) {
98   if (elements.empty())
99     return;
100
101   int entries_size = elements.size();
102   int start = 0;
103   int end = entries_size - 1;
104   while (start < end) {
105     while (start < entries_size && !predicate(elements.Get(start))) {
106       start++;
107     }
108     while (end >= 0 && predicate(elements.Get(end))) {
109       end--;
110     }
111     if (start < end) {
112       mutable_elements->SwapElements(start, end);
113       start++;
114       end--;
115     }
116   }
117   mutable_elements->DeleteSubrange(start, entries_size - start);
118 }
119
120 template <typename Predicate>
121 void PurgeDataFromUnsentLogStore(metrics::UnsentLogStore* ukm_log_store,
122                                  Predicate source_purging_condition) {
123   for (size_t index = 0; index < ukm_log_store->size(); index++) {
124     // Decode log data from store back into a Report.
125     Report report;
126     bool decode_success = metrics::DecodeLogDataToProto(
127         ukm_log_store->GetLogAtIndex(index), &report);
128     DCHECK(decode_success);
129
130     std::unordered_set<SourceId> relevant_source_ids;
131
132     // Grab ids of all sources satisfying the condition for purging.
133     for (const auto& source : report.sources()) {
134       if (source_purging_condition(source)) {
135         relevant_source_ids.insert(source.id());
136       }
137     }
138     if (relevant_source_ids.empty())
139       continue;
140
141     // Remove all relevant sources from the report.
142     FilterReportElements(
143         [&](const Source& element) {
144           return relevant_source_ids.count(element.id());
145         },
146         report.sources(), report.mutable_sources());
147
148     // Remove all entries originating from these sources.
149     FilterReportElements(
150         [&](const Entry& element) {
151           return relevant_source_ids.count(element.source_id());
152         },
153         report.entries(), report.mutable_entries());
154
155     std::string reserialized_log_data =
156         UkmService::SerializeReportProtoToString(&report);
157
158     // Replace the compressed log in the store by its filtered version.
159     const std::string old_compressed_log_data =
160         ukm_log_store->ReplaceLogAtIndex(index, reserialized_log_data,
161                                          metrics::LogMetadata());
162
163     // Reached here only if some Sources satisfied the condition for purging, so
164     // reserialized data should now be different.
165     DCHECK(ukm_log_store->GetLogAtIndex(index) != old_compressed_log_data);
166   }
167 }
168
169 }  // namespace
170
171 // static
172 BASE_FEATURE(kReportUserNoisedUserBirthYearAndGender,
173              "UkmReportNoisedUserBirthYearAndGender",
174              base::FEATURE_ENABLED_BY_DEFAULT);
175
176 bool UkmService::LogCanBeParsed(const std::string& serialized_data) {
177   Report report;
178   bool report_parse_successful = report.ParseFromString(serialized_data);
179   if (!report_parse_successful)
180     return false;
181   // Make sure the reserialized log from this |report| matches the input
182   // |serialized_data|.
183   std::string reserialized_from_report;
184   report.SerializeToString(&reserialized_from_report);
185   return reserialized_from_report == serialized_data;
186 }
187
188 std::string UkmService::SerializeReportProtoToString(Report* report) {
189   std::string serialized_full_log;
190   report->SerializeToString(&serialized_full_log);
191
192   // This allows catching errors with bad UKM serialization we've seen before
193   // that would otherwise only be noticed on the server.
194   DCHECK(UkmService::LogCanBeParsed(serialized_full_log));
195   return serialized_full_log;
196 }
197
198 UkmService::UkmService(PrefService* pref_service,
199                        metrics::MetricsServiceClient* client,
200                        std::unique_ptr<metrics::UkmDemographicMetricsProvider>
201                            demographics_provider,
202                        uint64_t external_client_id)
203     : recorder_client_registry_(
204           std::make_unique<metrics::UkmRecorderClientInterfaceRegistry>()),
205       pref_service_(pref_service),
206       external_client_id_(external_client_id),
207       client_(client),
208       demographics_provider_(std::move(demographics_provider)),
209       reporting_service_(client, pref_service),
210       task_runner_(base::SequencedTaskRunner::GetCurrentDefault()) {
211   DCHECK(pref_service_);
212   DCHECK(client_);
213   DVLOG(1) << "UkmService::Constructor";
214   reporting_service_.Initialize();
215
216   if (base::FeatureList::IsEnabled(
217           metrics::features::kMetricsClearLogsOnClonedInstall)) {
218     cloned_install_subscription_ = client->AddOnClonedInstallDetectedCallback(
219         base::BindOnce(&UkmService::OnClonedInstallDetected,
220                        self_ptr_factory_.GetWeakPtr()));
221   }
222
223   base::RepeatingClosure rotate_callback = base::BindRepeating(
224       &UkmService::RotateLog, self_ptr_factory_.GetWeakPtr());
225   // MetricsServiceClient outlives UkmService, and
226   // MetricsReportingScheduler is tied to the lifetime of |this|.
227   const base::RepeatingCallback<base::TimeDelta(void)>&
228       get_upload_interval_callback =
229           base::BindRepeating(&metrics::MetricsServiceClient::GetUploadInterval,
230                               base::Unretained(client_));
231   bool fast_startup_for_testing = client_->ShouldStartUpFastForTesting();
232   scheduler_ = std::make_unique<UkmRotationScheduler>(
233       rotate_callback, fast_startup_for_testing, get_upload_interval_callback);
234   InitDecodeMap();
235
236   DelegatingUkmRecorder::Get()->AddDelegate(self_ptr_factory_.GetWeakPtr());
237 }
238
239 UkmService::~UkmService() {
240   DisableReporting();
241   DelegatingUkmRecorder::Get()->RemoveDelegate(this);
242 }
243
244 void UkmService::Initialize() {
245   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
246   DCHECK(!initialize_started_);
247   DVLOG(1) << "UkmService::Initialize";
248   initialize_started_ = true;
249
250   DCHECK_EQ(0, report_count_);
251   if (client_->ShouldResetClientIdsOnClonedInstall()) {
252     ResetClientState(ResetReason::kClonedInstall);
253   } else {
254     client_id_ =
255         LoadOrGenerateAndStoreClientId(pref_service_, external_client_id_);
256     session_id_ = LoadAndIncrementSessionId(pref_service_);
257   }
258
259   metrics_providers_.Init();
260
261   StartInitTask();
262 }
263
264 void UkmService::EnableReporting() {
265   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
266   DVLOG(1) << "UkmService::EnableReporting";
267   if (reporting_service_.reporting_active())
268     return;
269
270   log_creation_time_ = base::TimeTicks::Now();
271   metrics_providers_.OnRecordingEnabled();
272
273   if (!initialize_started_)
274     Initialize();
275   scheduler_->Start();
276   reporting_service_.EnableReporting();
277 }
278
279 void UkmService::DisableReporting() {
280   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
281   DVLOG(1) << "UkmService::DisableReporting";
282
283   reporting_service_.DisableReporting();
284
285   metrics_providers_.OnRecordingDisabled();
286
287   scheduler_->Stop();
288   Flush(metrics::MetricsLogsEventManager::CreateReason::kServiceShutdown);
289 }
290
291 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_IOS)
292 void UkmService::OnAppEnterForeground() {
293   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
294   DVLOG(1) << "UkmService::OnAppEnterForeground";
295
296   reporting_service_.SetIsInForegound(true);
297
298   // If initialize_started_ is false, UKM has not yet been started, so bail. The
299   // scheduler will instead be started via EnableReporting().
300   if (!initialize_started_)
301     return;
302
303   scheduler_->Start();
304 }
305
306 void UkmService::OnAppEnterBackground() {
307   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
308   DVLOG(1) << "UkmService::OnAppEnterBackground";
309
310   reporting_service_.SetIsInForegound(false);
311
312   if (!initialize_started_)
313     return;
314
315   scheduler_->Stop();
316
317   // Give providers a chance to persist ukm data as part of being backgrounded.
318   metrics_providers_.OnAppEnterBackground();
319
320   Flush(metrics::MetricsLogsEventManager::CreateReason::kBackgrounded);
321 }
322 #endif
323
324 void UkmService::Flush(metrics::MetricsLogsEventManager::CreateReason reason) {
325   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
326   if (initialize_complete_)
327     BuildAndStoreLog(reason);
328   reporting_service_.ukm_log_store()->TrimAndPersistUnsentLogs(
329       /*overwrite_in_memory_store=*/true);
330 }
331
332 void UkmService::Purge() {
333   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
334   DVLOG(1) << "UkmService::Purge";
335   reporting_service_.ukm_log_store()->Purge();
336   UkmRecorderImpl::Purge();
337 }
338
339 void UkmService::PurgeExtensionsData() {
340   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
341   DVLOG(1) << "UkmService::PurgeExtensionsData";
342   // Filter out any extension-related data from the serialized logs in the
343   // UnsentLogStore for uploading, base on having kExtensionScheme URL scheme.
344   PurgeDataFromUnsentLogStore(
345       reporting_service_.ukm_log_store(), [&](const Source& source) {
346         // Check if any URL on the Source has the kExtensionScheme URL scheme.
347         // It is possible that only one of multiple URLs does due to redirect,
348         // in this case, we should still purge the source.
349         for (const auto& url_info : source.urls()) {
350           if (GURL(url_info.url()).SchemeIs(kExtensionScheme))
351             return true;
352         }
353         return false;
354       });
355
356   // Purge data currently in the recordings intended for the next
357   // ukm::Report.
358   UkmRecorderImpl::PurgeRecordingsWithUrlScheme(kExtensionScheme);
359 }
360
361 void UkmService::PurgeAppsData() {
362   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
363   DVLOG(1) << "UkmService::PurgeAppsData";
364   // Filter out any apps-related data from the serialized logs in the
365   // UnsentLogStore for uploading.
366   // Also purge based on source id type, because some apps don't use app://
367   // scheme.
368   // For example, OS Settings is an ChromeOS app with "chrome://os-settings" as
369   // its URL.
370   PurgeDataFromUnsentLogStore(
371       reporting_service_.ukm_log_store(), [&](const Source& source) {
372         if (GetSourceIdType(source.id()) == SourceIdType::APP_ID)
373           return true;
374         for (const auto& url_info : source.urls()) {
375           if (GURL(url_info.url()).SchemeIs(kAppScheme))
376             return true;
377         }
378         return false;
379       });
380
381   // Purge data currently in the recordings intended for the next ukm::Report.
382   UkmRecorderImpl::PurgeRecordingsWithUrlScheme(kAppScheme);
383   UkmRecorderImpl::PurgeRecordingsWithSourceIdType(SourceIdType::APP_ID);
384 }
385
386 void UkmService::PurgeMsbbData() {
387   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
388   // Filter out any MSBB-related data from the serialized logs in the
389   // UnsentLogStore for uploading.
390   PurgeDataFromUnsentLogStore(
391       reporting_service_.ukm_log_store(), [&](const Source& source) {
392         return UkmRecorderImpl::GetConsentType(GetSourceIdType(source.id())) ==
393                MSBB;
394       });
395
396   // Purge data currently in the recordings intended for the next ukm::Report.
397   UkmRecorderImpl::PurgeRecordingsWithMsbbSources();
398 }
399
400 void UkmService::ResetClientState(ResetReason reason) {
401   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
402
403   UMA_HISTOGRAM_ENUMERATION("UKM.ResetReason", reason);
404
405   if (external_client_id_) {
406     client_id_ = external_client_id_;
407     pref_service_->SetUint64(prefs::kUkmClientId, client_id_);
408   } else {
409     client_id_ = GenerateAndStoreClientId(pref_service_);
410   }
411
412   // Note: the session_id has already been cleared by GenerateAndStoreClientId.
413   session_id_ = LoadAndIncrementSessionId(pref_service_);
414   report_count_ = 0;
415
416   metrics_providers_.OnClientStateCleared();
417 }
418
419 void UkmService::OnClonedInstallDetected() {
420   // Purge all logs, as they may come from a previous install. Unfortunately,
421   // since the cloned install detector works asynchronously, it is possible that
422   // this is called after logs were already sent. However, practically speaking,
423   // this should not happen, since logs are only sent late into the session.
424   reporting_service_.ukm_log_store()->Purge();
425 }
426
427 void UkmService::RegisterMetricsProvider(
428     std::unique_ptr<metrics::MetricsProvider> provider) {
429   metrics_providers_.RegisterMetricsProvider(std::move(provider));
430 }
431
432 void UkmService::RegisterEventFilter(std::unique_ptr<UkmEntryFilter> filter) {
433   SetEntryFilter(std::move(filter));
434 }
435
436 // static
437 void UkmService::RegisterPrefs(PrefRegistrySimple* registry) {
438   registry->RegisterUint64Pref(prefs::kUkmClientId, 0);
439   registry->RegisterIntegerPref(prefs::kUkmSessionId, 0);
440   UkmReportingService::RegisterPrefs(registry);
441 }
442
443 void UkmService::OnRecorderParametersChanged() {
444   task_runner_->PostTask(
445       FROM_HERE, base::BindOnce(&UkmService::OnRecorderParametersChangedImpl,
446                                 self_ptr_factory_.GetWeakPtr()));
447 }
448
449 void UkmService::OnRecorderParametersChangedImpl() {
450   auto params = mojom::UkmRecorderParameters::New();
451   params->is_enabled = recording_enabled();
452
453   std::set<uint64_t> events = GetObservedEventHashes();
454   params->event_hash_bypass_list.insert(params->event_hash_bypass_list.end(),
455                                         events.begin(), events.end());
456   recorder_client_registry_->SetRecorderParameters(std::move(params));
457 }
458
459 void UkmService::StartInitTask() {
460   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
461   DVLOG(1) << "UkmService::StartInitTask";
462   metrics_providers_.AsyncInit(base::BindOnce(&UkmService::FinishedInitTask,
463                                               self_ptr_factory_.GetWeakPtr()));
464 }
465
466 void UkmService::FinishedInitTask() {
467   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
468   DVLOG(1) << "UkmService::FinishedInitTask";
469   initialize_complete_ = true;
470   scheduler_->InitTaskComplete();
471   if (initialization_complete_callback_) {
472     std::move(initialization_complete_callback_).Run();
473   }
474 }
475
476 void UkmService::RotateLog() {
477   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
478   DVLOG(1) << "UkmService::RotateLog";
479   if (!reporting_service_.ukm_log_store()->has_unsent_logs())
480     BuildAndStoreLog(metrics::MetricsLogsEventManager::CreateReason::kPeriodic);
481   reporting_service_.Start();
482   scheduler_->RotationFinished();
483 }
484
485 void UkmService::AddSyncedUserNoiseBirthYearAndGenderToReport(Report* report) {
486   if (!base::FeatureList::IsEnabled(kReportUserNoisedUserBirthYearAndGender) ||
487       !demographics_provider_) {
488     return;
489   }
490
491   demographics_provider_->ProvideSyncedUserNoisedBirthYearAndGenderToReport(
492       report);
493 }
494
495 void UkmService::BuildAndStoreLog(
496     metrics::MetricsLogsEventManager::CreateReason reason) {
497   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
498   DVLOG(1) << "UkmService::BuildAndStoreLog";
499
500   // This may add new UKMs. This means this needs to be done before the empty
501   // log suppression checks.
502   metrics_providers_.ProvideCurrentSessionUKMData();
503
504   // Suppress generating a log if we have no new data to include.
505   bool empty = sources().empty() && entries().empty();
506   UMA_HISTOGRAM_BOOLEAN("UKM.BuildAndStoreLogIsEmpty", empty);
507   if (empty)
508     return;
509
510   Report report;
511   report.set_client_id(client_id_);
512   report.set_session_id(session_id_);
513   report.set_report_id(++report_count_);
514
515   const auto product = static_cast<metrics::ChromeUserMetricsExtension_Product>(
516       client_->GetProduct());
517   // Only set the product if it differs from the default value.
518   if (product != report.product())
519     report.set_product(product);
520
521   StoreRecordingsInReport(&report);
522
523   metrics::MetricsLog::RecordCoreSystemProfile(client_,
524                                                report.mutable_system_profile());
525
526   metrics_providers_.ProvideSystemProfileMetricsWithLogCreationTime(
527       log_creation_time_, report.mutable_system_profile());
528
529   AddSyncedUserNoiseBirthYearAndGenderToReport(&report);
530
531   std::string serialized_log =
532       UkmService::SerializeReportProtoToString(&report);
533   metrics::LogMetadata log_metadata;
534   reporting_service_.ukm_log_store()->StoreLog(serialized_log, log_metadata,
535                                                reason);
536 }
537
538 void UkmService::SetInitializationCompleteCallbackForTesting(
539     base::OnceClosure callback) {
540   if (initialize_complete_) {
541     std::move(callback).Run();
542   } else {
543     // Store the callback to be invoked when initialization is complete later.
544     initialization_complete_callback_ = std::move(callback);
545   }
546 }
547
548 }  // namespace ukm