Upload upstream chromium 114.0.5735.31
[platform/framework/web/chromium-efl.git] / components / ukm / ukm_recorder_impl.cc
1 // Copyright 2017 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/ukm/ukm_recorder_impl.h"
6
7 #include <memory>
8 #include <string>
9 #include <unordered_map>
10 #include <utility>
11
12 #include "base/component_export.h"
13 #include "base/containers/contains.h"
14 #include "base/feature_list.h"
15 #include "base/metrics/crc32.h"
16 #include "base/metrics/field_trial.h"
17 #include "base/metrics/field_trial_params.h"
18 #include "base/metrics/histogram_functions.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/metrics/metrics_hashes.h"
21 #include "base/rand_util.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/strings/string_split.h"
24 #include "base/time/time.h"
25 #include "base/trace_event/typed_macros.h"
26 #include "components/ukm/scheme_constants.h"
27 #include "components/ukm/ukm_recorder_observer.h"
28 #include "components/variations/variations_associated_data.h"
29 #include "services/metrics/public/cpp/ukm_builders.h"
30 #include "services/metrics/public/cpp/ukm_decode.h"
31 #include "services/metrics/public/cpp/ukm_recorder.h"
32 #include "services/metrics/public/cpp/ukm_recorder_impl_utils.h"
33 #include "services/metrics/public/cpp/ukm_source.h"
34 #include "services/metrics/public/cpp/ukm_source_id.h"
35 #include "services/metrics/public/mojom/ukm_interface.mojom.h"
36 #include "third_party/metrics_proto/ukm/entry.pb.h"
37 #include "third_party/metrics_proto/ukm/report.pb.h"
38 #include "third_party/metrics_proto/ukm/source.pb.h"
39 #include "ukm_consent_state.h"
40 #include "ukm_recorder_impl.h"
41 #include "url/gurl.h"
42
43 namespace ukm {
44
45 BASE_FEATURE(kUkmSamplingRateFeature,
46              "UkmSamplingRate",
47              base::FEATURE_DISABLED_BY_DEFAULT);
48
49 namespace {
50
51 bool IsAllowlistedSourceId(SourceId source_id) {
52   SourceIdType type = GetSourceIdType(source_id);
53   switch (type) {
54     case ukm::SourceIdObj::Type::NAVIGATION_ID:
55     case ukm::SourceIdObj::Type::APP_ID:
56     case ukm::SourceIdObj::Type::HISTORY_ID:
57     case ukm::SourceIdObj::Type::WEBAPK_ID:
58     case ukm::SourceIdObj::Type::PAYMENT_APP_ID:
59     case ukm::SourceIdObj::Type::NO_URL_ID:
60     case ukm::SourceIdObj::Type::REDIRECT_ID:
61     case ukm::SourceIdObj::Type::WEB_IDENTITY_ID:
62     case ukm::SourceIdObj::Type::CHROMEOS_WEBSITE_ID:
63     case ukm::SourceIdObj::Type::EXTENSION_ID:
64     case ukm::SourceIdObj::Type::SOFT_NAVIGATION_ID: {
65       return true;
66     }
67     case ukm::SourceIdObj::Type::DEFAULT:
68     case ukm::SourceIdObj::Type::DESKTOP_WEB_APP_ID:
69     case ukm::SourceIdObj::Type::WORKER_ID:
70       return false;
71   }
72 }
73
74 bool IsAppIdType(SourceId source_id) {
75   SourceIdType type = GetSourceIdType(source_id);
76   return type == SourceIdType::APP_ID;
77 }
78
79 // Returns whether |url| has one of the schemes supported for logging to UKM.
80 // URLs with other schemes will not be logged.
81 bool HasSupportedScheme(const GURL& url) {
82   return url.SchemeIsHTTPOrHTTPS() || url.SchemeIs(url::kAboutScheme) ||
83          url.SchemeIs(kChromeUIScheme) || url.SchemeIs(kExtensionScheme) ||
84          url.SchemeIs(kAppScheme);
85 }
86
87 void RecordDroppedSource(DroppedDataReason reason) {
88   UMA_HISTOGRAM_ENUMERATION(
89       "UKM.Sources.Dropped", static_cast<int>(reason),
90       static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
91 }
92
93 void RecordDroppedSource(bool already_recorded_another_reason,
94                          DroppedDataReason reason) {
95   if (!already_recorded_another_reason)
96     RecordDroppedSource(reason);
97 }
98
99 void StoreEntryProto(const mojom::UkmEntry& in, Entry* out) {
100   DCHECK(!out->has_source_id());
101   DCHECK(!out->has_event_hash());
102
103   out->set_source_id(in.source_id);
104   out->set_event_hash(in.event_hash);
105   for (const auto& metric : in.metrics) {
106     Entry::Metric* proto_metric = out->add_metrics();
107     proto_metric->set_metric_hash(metric.first);
108     proto_metric->set_value(metric.second);
109   }
110 }
111
112 GURL SanitizeURL(const GURL& url) {
113   GURL::Replacements remove_params;
114   remove_params.ClearUsername();
115   remove_params.ClearPassword();
116   // chrome:// and about: URLs params are never used for navigation, only to
117   // prepopulate data on the page, so don't include their params.
118   if (url.SchemeIs(url::kAboutScheme) || url.SchemeIs("chrome")) {
119     remove_params.ClearQuery();
120   }
121   if (url.SchemeIs(kExtensionScheme)) {
122     remove_params.ClearPath();
123     remove_params.ClearQuery();
124     remove_params.ClearRef();
125   }
126   return url.ReplaceComponents(remove_params);
127 }
128
129 void AppendAllowlistedUrls(
130     const std::map<SourceId, std::unique_ptr<UkmSource>>& sources,
131     std::unordered_set<std::string>* urls) {
132   for (const auto& kv : sources) {
133     if (IsAllowlistedSourceId(kv.first)) {
134       urls->insert(kv.second->url().spec());
135       // Some non-navigation sources only record origin as a URL.
136       // Add the origin from the navigation source to match those too.
137       urls->insert(kv.second->url().DeprecatedGetOriginAsURL().spec());
138     }
139   }
140 }
141
142 // Returns true if the event corresponding to |event_hash| has a comprehensive
143 // decode map that includes all valid metrics.
144 bool HasComprehensiveDecodeMap(int64_t event_hash) {
145   // All events other than "Identifiability" conforms to its decode map.
146   // TODO(asanka): It is technically an abstraction violation for
147   // //components/ukm to know this fact.
148   return event_hash != builders::Identifiability::kEntryNameHash;
149 }
150
151 bool HasUnknownMetrics(const builders::DecodeMap& decode_map,
152                        const mojom::UkmEntry& entry) {
153   const auto it = decode_map.find(entry.event_hash);
154   if (it == decode_map.end())
155     return true;
156   if (!HasComprehensiveDecodeMap(entry.event_hash))
157     return false;
158   const auto& metric_map = it->second.metric_map;
159   for (const auto& metric : entry.metrics) {
160     if (metric_map.count(metric.first) == 0)
161       return true;
162   }
163   return false;
164 }
165
166 }  // namespace
167
168 UkmRecorderImpl::UkmRecorderImpl()
169     : sampling_seed_(static_cast<uint32_t>(base::RandUint64())) {
170   max_kept_sources_ =
171       static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
172           kUkmFeature, "MaxKeptSources", max_kept_sources_));
173 }
174
175 UkmRecorderImpl::~UkmRecorderImpl() = default;
176
177 UkmRecorderImpl::Recordings::Recordings() = default;
178 UkmRecorderImpl::Recordings& UkmRecorderImpl::Recordings::operator=(
179     Recordings&&) = default;
180 UkmRecorderImpl::Recordings::~Recordings() = default;
181
182 void UkmRecorderImpl::Recordings::Reset() {
183   *this = Recordings();
184 }
185
186 void UkmRecorderImpl::Recordings::SourceCounts::Reset() {
187   *this = SourceCounts();
188 }
189
190 void UkmRecorderImpl::UpdateRecording(ukm::UkmConsentState state) {
191   DVLOG(1) << "UkmRecorderImpl::UpdateRecording: " << state.ToEnumBitmask();
192   recording_state_ = state;
193   EnableRecording();
194 }
195
196 void UkmRecorderImpl::EnableRecording() {
197   recording_enabled_ = true;
198   OnRecorderParametersChanged();
199 }
200
201 void UkmRecorderImpl::DisableRecording() {
202   DVLOG(1) << "UkmRecorderImpl::DisableRecording";
203   if (recording_enabled())
204     recording_is_continuous_ = false;
205   recording_enabled_ = false;
206   OnRecorderParametersChanged();
207 }
208
209 void UkmRecorderImpl::SetSamplingForTesting(int rate) {
210   sampling_forced_for_testing_ = true;
211   default_sampling_rate_ = rate;
212   event_sampling_rates_.clear();
213 }
214
215 bool UkmRecorderImpl::ShouldDropEntryForTesting(mojom::UkmEntry* entry) {
216   return ShouldDropEntry(entry);
217 }
218
219 bool UkmRecorderImpl::IsSamplingConfigured() const {
220   return sampling_forced_for_testing_ ||
221          base::FeatureList::IsEnabled(kUkmSamplingRateFeature);
222 }
223
224 void UkmRecorderImpl::Purge() {
225   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
226   recordings_.Reset();
227   recording_is_continuous_ = false;
228
229   NotifyAllObservers(&UkmRecorderObserver::OnPurge);
230 }
231
232 void UkmRecorderImpl::PurgeRecordingsWithUrlScheme(
233     const std::string& url_scheme) {
234   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
235
236   // Discard all sources that have a URL with the given URL scheme as well as
237   // all the entries associated with these sources.
238   std::unordered_set<SourceId> relevant_source_ids;
239   for (const auto& kv : recordings_.sources) {
240     if (kv.second->url().SchemeIs(url_scheme)) {
241       relevant_source_ids.insert(kv.first);
242     }
243   }
244
245   PurgeSourcesAndEventsBySourceIds(relevant_source_ids);
246   recording_is_continuous_ = false;
247
248   NotifyAllObservers(&UkmRecorderObserver::OnPurgeRecordingsWithUrlScheme,
249                      url_scheme);
250 }
251
252 void UkmRecorderImpl::PurgeRecordingsWithSourceIdType(
253     ukm::SourceIdType source_id_type) {
254   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
255   std::unordered_set<SourceId> relevant_source_ids;
256
257   for (const auto& kv : recordings_.sources) {
258     if (GetSourceIdType(kv.first) == source_id_type) {
259       relevant_source_ids.insert(kv.first);
260     }
261   }
262
263   PurgeSourcesAndEventsBySourceIds(relevant_source_ids);
264   recording_is_continuous_ = false;
265 }
266
267 void UkmRecorderImpl::PurgeRecordingsWithMsbbSources() {
268   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
269   std::unordered_set<SourceId> relevant_source_ids;
270
271   for (const auto& kv : recordings_.sources) {
272     if (GetConsentType(GetSourceIdType(kv.first)) == MSBB) {
273       relevant_source_ids.insert(kv.first);
274     }
275   }
276
277   PurgeSourcesAndEventsBySourceIds(relevant_source_ids);
278   recording_is_continuous_ = false;
279 }
280
281 void UkmRecorderImpl::PurgeSourcesAndEventsBySourceIds(
282     const std::unordered_set<SourceId>& source_ids) {
283   for (const auto source_id : source_ids) {
284     recordings_.sources.erase(source_id);
285   }
286
287   std::vector<mojom::UkmEntryPtr>& events = recordings_.entries;
288
289   events.erase(std::remove_if(events.begin(), events.end(),
290                               [&](const auto& event) {
291                                 return source_ids.count(event->source_id);
292                               }),
293                events.end());
294 }
295
296 void UkmRecorderImpl::MarkSourceForDeletion(SourceId source_id) {
297   if (source_id == kInvalidSourceId)
298     return;
299   recordings_.obsolete_source_ids.insert(source_id);
300 }
301
302 void UkmRecorderImpl::SetIsWebstoreExtensionCallback(
303     const IsWebstoreExtensionCallback& callback) {
304   is_webstore_extension_callback_ = callback;
305 }
306
307 void UkmRecorderImpl::SetEntryFilter(
308     std::unique_ptr<UkmEntryFilter> entry_filter) {
309   DCHECK(!entry_filter_ || !entry_filter);
310   entry_filter_ = std::move(entry_filter);
311 }
312
313 void UkmRecorderImpl::AddUkmRecorderObserver(
314     const base::flat_set<uint64_t>& event_hashes,
315     UkmRecorderObserver* observer) {
316   DCHECK(observer);
317   {
318     base::AutoLock auto_lock(lock_);
319     if (!observers_.contains(event_hashes)) {
320       observers_.insert(
321           {event_hashes, base::MakeRefCounted<UkmRecorderObserverList>()});
322     }
323
324     observers_[event_hashes]->AddObserver(observer);
325   }
326   // Update the UkmRecorderParameters to capture a UKM event which is being
327   // observed by any UkmRecorderObserver in |observers_|.
328   OnRecorderParametersChanged();
329 }
330
331 void UkmRecorderImpl::RemoveUkmRecorderObserver(UkmRecorderObserver* observer) {
332   {
333     base::AutoLock auto_lock(lock_);
334     for (auto it = observers_.begin(); it != observers_.end();) {
335       if (it->second->RemoveObserver(observer) ==
336           UkmRecorderObserverList::RemoveObserverResult::kWasOrBecameEmpty) {
337         it = observers_.erase(it);
338       } else {
339         ++it;
340       }
341     }
342   }
343   OnRecorderParametersChanged();
344 }
345
346 void UkmRecorderImpl::OnUkmAllowedStateChanged(UkmConsentState state) {
347   NotifyAllObservers(&UkmRecorderObserver::OnUkmAllowedStateChanged, state);
348 }
349
350 void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
351   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
352
353   // Set of source ids seen by entries in recordings_.
354   std::set<SourceId> source_ids_seen;
355   for (const auto& entry : recordings_.entries) {
356     Entry* proto_entry = report->add_entries();
357     StoreEntryProto(*entry, proto_entry);
358     source_ids_seen.insert(entry->source_id);
359   }
360
361   // Number of sources excluded from this report because no entries referred to
362   // them.
363   const int num_sources_unsent =
364       recordings_.sources.size() - source_ids_seen.size();
365
366   // Construct set of allowlisted URLs by merging those carried over from the
367   // previous report cycle and those from sources recorded in this cycle.
368   std::unordered_set<std::string> url_allowlist;
369   recordings_.carryover_urls_allowlist.swap(url_allowlist);
370   AppendAllowlistedUrls(recordings_.sources, &url_allowlist);
371
372   // Number of sources discarded due to not matching a navigation URL.
373   int num_sources_unmatched = 0;
374
375   std::unordered_map<SourceIdType, int> serialized_source_type_counts;
376
377   for (const auto& kv : recordings_.sources) {
378     MaybeMarkForDeletion(kv.first);
379     // If the source id is not allowlisted, don't send it unless it has
380     // associated entries and the URL matches that of an allowlisted source.
381     if (!IsAllowlistedSourceId(kv.first)) {
382       // UkmSource should not keep initial_url for non-navigation source IDs.
383       DCHECK_EQ(1u, kv.second->urls().size());
384       if (!url_allowlist.count(kv.second->url().spec())) {
385         RecordDroppedSource(DroppedDataReason::NOT_MATCHED);
386         MarkSourceForDeletion(kv.first);
387         num_sources_unmatched++;
388         continue;
389       }
390       // Omit entryless sources from the report.
391       if (!base::Contains(source_ids_seen, kv.first)) {
392         continue;
393       }
394
395       // Non-allowlisted Source types will not be kept after entries are
396       // logged.
397       // We experimented with this in early 2023 and we found keeping sources
398       // longer didn't decrease the percentage of sources with null url. See
399       // crbug/1358334.
400       MarkSourceForDeletion(kv.first);
401     }
402     // Minimal validations before serializing into a proto message.
403     // See crbug/1274876.
404     DCHECK_NE(kv.second->id(), ukm::kInvalidSourceId);
405     DCHECK_NE(kv.second->urls().size(), 0u);
406     Source* proto_source = report->add_sources();
407     kv.second->PopulateProto(proto_source);
408
409     serialized_source_type_counts[GetSourceIdType(kv.first)]++;
410   }
411
412   for (const auto& event_and_aggregate : recordings_.event_aggregations) {
413     Aggregate* proto_aggregate = report->add_aggregates();
414     proto_aggregate->set_event_hash(event_and_aggregate.first);
415
416     const EventAggregate& event_aggregate = event_and_aggregate.second;
417     event_aggregate.FillProto(proto_aggregate);
418   }
419   int num_serialized_sources = 0;
420   for (const auto& source_type_and_count : serialized_source_type_counts) {
421     num_serialized_sources += source_type_and_count.second;
422   }
423
424   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount2",
425                             num_serialized_sources);
426   UMA_HISTOGRAM_COUNTS_100000("UKM.Entries.SerializedCount2",
427                               recordings_.entries.size());
428   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnsentSourcesCount",
429                             num_sources_unsent);
430   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnmatchedSourcesCount",
431                             num_sources_unmatched);
432
433   UMA_HISTOGRAM_COUNTS_1000(
434       "UKM.Sources.SerializedCount2.Default",
435       serialized_source_type_counts[SourceIdType::DEFAULT]);
436   UMA_HISTOGRAM_COUNTS_1000(
437       "UKM.Sources.SerializedCount2.Navigation",
438       serialized_source_type_counts[SourceIdType::NAVIGATION_ID]);
439   UMA_HISTOGRAM_COUNTS_1000(
440       "UKM.Sources.SerializedCount2.App",
441       serialized_source_type_counts[SourceIdType::APP_ID]);
442
443   // We record a UMA metric specifically for the number of serialized events
444   // with the FCP metric. This is for data quality verification.
445   const uint64_t pageload_hash =
446       base::HashMetricName(ukm::builders::PageLoad::kEntryName);
447   const uint64_t fcp_hash = base::HashMetricName(
448       ukm::builders::PageLoad::
449           kPaintTiming_NavigationToFirstContentfulPaintName);
450   int num_recorded_fcp = 0;
451   for (const auto& entry : recordings_.entries) {
452     if (entry->event_hash == pageload_hash) {
453       if (entry->metrics.find(fcp_hash) != entry->metrics.end()) {
454         num_recorded_fcp++;
455       }
456     }
457   }
458   UMA_HISTOGRAM_COUNTS_100000("UKM.Entries.SerializedCountFCP",
459                               num_recorded_fcp);
460
461   // For each matching id in obsolete_source_ids, remove the Source from
462   // recordings_.sources. The remaining sources form the deferred sources for
463   // the next report.
464   for (const SourceId& source_id : recordings_.obsolete_source_ids) {
465     recordings_.sources.erase(source_id);
466   }
467   recordings_.obsolete_source_ids.clear();
468
469   // Populate SourceCounts field on the report then clear the recordings.
470   Report::SourceCounts* source_counts_proto = report->mutable_source_counts();
471   source_counts_proto->set_observed(recordings_.source_counts.observed);
472   source_counts_proto->set_navigation_sources(
473       recordings_.source_counts.navigation_sources);
474   source_counts_proto->set_unmatched_sources(num_sources_unmatched);
475   source_counts_proto->set_carryover_sources(
476       recordings_.source_counts.carryover_sources);
477
478   recordings_.source_counts.Reset();
479   recordings_.entries.clear();
480   recordings_.event_aggregations.clear();
481
482   report->set_is_continuous(recording_is_continuous_);
483   recording_is_continuous_ = true;
484
485   int pruned_sources_age_sec = PruneData(source_ids_seen);
486
487   // Record how old the newest truncated source is.
488   source_counts_proto->set_pruned_sources_age_seconds(pruned_sources_age_sec);
489
490   // Set deferred sources count after pruning.
491   source_counts_proto->set_deferred_sources(recordings_.sources.size());
492   // Same value as the deferred source count, for setting the carryover count
493   // in the next reporting cycle.
494   recordings_.source_counts.carryover_sources = recordings_.sources.size();
495
496   // We already matched these deferred sources against the URL allowlist.
497   // Re-allowlist them for the next report.
498   for (const auto& kv : recordings_.sources) {
499     recordings_.carryover_urls_allowlist.insert(kv.second->url().spec());
500   }
501
502   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.KeptSourcesCount",
503                             recordings_.sources.size());
504
505   // Record number of sources after pruning that were carried over due to not
506   // having any events in this reporting cycle.
507   int num_sources_entryless = 0;
508   for (const auto& kv : recordings_.sources) {
509     if (!base::Contains(source_ids_seen, kv.first)) {
510       num_sources_entryless++;
511     }
512   }
513   source_counts_proto->set_entryless_sources(num_sources_entryless);
514
515   // Notify observers that a report was generated.
516   if (entry_filter_) {
517     entry_filter_->OnStoreRecordingsInReport();
518   }
519 }
520
521 int UkmRecorderImpl::PruneData(std::set<SourceId>& source_ids_seen) {
522   // Modify the set source_ids_seen by removing sources that aren't in
523   // recordings_. We do this here as there is a few places for
524   // recordings_.sources to be modified. The resulting set will be currently
525   // existing sources that were seen in this report.
526   auto it = source_ids_seen.begin();
527   while (it != source_ids_seen.end()) {
528     if (!base::Contains(recordings_.sources, *it)) {
529       it = source_ids_seen.erase(it);
530     } else {
531       it++;
532     }
533   }
534
535   // Build the set of sources that exist in recordings_.sources that were not
536   // seen in this report.
537   std::set<SourceId> source_ids_unseen;
538   for (const auto& kv : recordings_.sources) {
539     if (!base::Contains(source_ids_seen, kv.first)) {
540       source_ids_unseen.insert(kv.first);
541     }
542   }
543
544   // Special case APP_IDs. Ideally this is not going to exist for too long, as
545   // it would be preferable to have a more general purpose solution.
546   std::set<SourceId> source_ids_app_id;
547
548   // Only done if we are in the experiment that will leave APP_ID metrics for
549   // last when pruning. This block extracts out all source_ids from the
550   // seen/unseen lists and stores them in |source_ids_app_id|.
551   if (base::GetFieldTrialParamByFeatureAsBool(kUkmFeature, "PruneAppIdLast",
552                                               false)) {
553     it = source_ids_seen.begin();
554     while (it != source_ids_seen.end()) {
555       if (IsAppIdType(*it)) {
556         source_ids_app_id.insert(*it);
557         it = source_ids_seen.erase(it);
558       } else {
559         it++;
560       }
561     }
562
563     it = source_ids_unseen.begin();
564     while (it != source_ids_unseen.end()) {
565       if (IsAppIdType(*it)) {
566         source_ids_app_id.insert(*it);
567         it = source_ids_unseen.erase(it);
568       } else {
569         it++;
570       }
571     }
572   }
573
574   int pruned_sources_age_sec = 0;
575   int num_sources = recordings_.sources.size();
576   // Setup an experiment to test what will occur if we prune unseen sources
577   // first.
578   if (base::GetFieldTrialParamByFeatureAsBool(
579           kUkmFeature, "PruneUnseenSourcesFirst", false)) {
580     int pruned_sources_age_from_unseen_sec =
581         PruneOldSources(max_kept_sources_, source_ids_unseen);
582
583     UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NumUnseen",
584                                num_sources - recordings_.sources.size());
585     num_sources = recordings_.sources.size();
586
587     // Prune again from seen sources. Note that if we've already pruned enough
588     // from the unseen sources, this will be a noop.
589     int pruned_sources_age_from_seen_sec =
590         PruneOldSources(max_kept_sources_, source_ids_seen);
591
592     UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NumSeen",
593                                num_sources - recordings_.sources.size());
594     num_sources = recordings_.sources.size();
595
596     int pruned_sources_age_from_app_id_sec = 0;
597
598     // Technically this should be fine without the feature, since the group
599     // will be empty, but might as well add the feature check.
600     // Still prune the APP_ID entries. We don't want it to be unbounded, but
601     // providing a higher default here in case.
602     if (base::GetFieldTrialParamByFeatureAsBool(kUkmFeature, "PruneAppIdLast",
603                                                 false)) {
604       pruned_sources_age_from_app_id_sec =
605           PruneOldSources(500, source_ids_app_id);
606
607       UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NumAppId",
608                                  num_sources - recordings_.sources.size());
609     }
610
611     // We're looking for the newest age, which will be the largest between the
612     // two sets we pruned from.
613     pruned_sources_age_sec = std::max({pruned_sources_age_from_unseen_sec,
614                                        pruned_sources_age_from_seen_sec,
615                                        pruned_sources_age_from_app_id_sec});
616
617   } else {
618     // In this case, we prune all sources without caring if they were seen or
619     // not. Make a set of all existing sources so we can use the same
620     // PruneOldSources method.
621     std::set<SourceId> all_sources;
622     for (const auto& kv : recordings_.sources) {
623       all_sources.insert(kv.first);
624     }
625     if (base::GetFieldTrialParamByFeatureAsBool(kUkmFeature, "PruneAppIdLast",
626                                                 false)) {
627       std::set<SourceId> all_sources_without_app_id;
628
629       // This will put into |all_sources_without_app_id| the set of
630       // |all_sources| - |source_ids_app_id|.
631       std::set_difference(all_sources.begin(), all_sources.end(),
632                           source_ids_app_id.begin(), source_ids_app_id.end(),
633                           std::inserter(all_sources_without_app_id,
634                                         all_sources_without_app_id.end()));
635
636       // Now, prune the non-APP_ID, then the APP_ID.
637       int pruned_sources_age_sec_non_app_id =
638           PruneOldSources(max_kept_sources_, all_sources_without_app_id);
639
640       UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.AppExpNumNonAppId",
641                                  num_sources - recordings_.sources.size());
642       num_sources = recordings_.sources.size();
643
644       int pruned_sources_age_sec_app_id =
645           PruneOldSources(500, source_ids_app_id);
646
647       UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.AppExpNumAppId",
648                                  num_sources - recordings_.sources.size());
649
650       pruned_sources_age_sec = std::max(pruned_sources_age_sec_non_app_id,
651                                         pruned_sources_age_sec_app_id);
652
653     } else {
654       pruned_sources_age_sec = PruneOldSources(max_kept_sources_, all_sources);
655       UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NoExp",
656                                  num_sources - recordings_.sources.size());
657     }
658   }
659   return pruned_sources_age_sec;
660 }
661
662 bool UkmRecorderImpl::ShouldDropEntry(mojom::UkmEntry* entry) {
663   if (!recording_enabled()) {
664     RecordDroppedEntry(entry->event_hash,
665                        DroppedDataReason::RECORDING_DISABLED);
666     return true;
667   }
668
669   const auto required_consent =
670       GetConsentType(GetSourceIdType(entry->source_id));
671
672   if (!recording_enabled(required_consent)) {
673     if (required_consent == UkmConsentType::MSBB) {
674       RecordDroppedEntry(entry->event_hash,
675                          DroppedDataReason::MSBB_CONSENT_DISABLED);
676
677     } else {
678       RecordDroppedEntry(entry->event_hash,
679                          DroppedDataReason::APPS_CONSENT_DISABLED);
680     }
681     return true;
682   }
683
684   if (!ApplyEntryFilter(entry)) {
685     RecordDroppedEntry(entry->event_hash,
686                        DroppedDataReason::REJECTED_BY_FILTER);
687     return true;
688   }
689
690   return false;
691 }
692
693 bool UkmRecorderImpl::ApplyEntryFilter(mojom::UkmEntry* entry) {
694   base::flat_set<uint64_t> dropped_metric_hashes;
695
696   if (!entry_filter_)
697     return true;
698
699   bool keep_entry = entry_filter_->FilterEntry(entry, &dropped_metric_hashes);
700
701   for (auto metric : dropped_metric_hashes) {
702     recordings_.event_aggregations[entry->event_hash]
703         .metrics[metric]
704         .dropped_due_to_filter++;
705   }
706
707   if (!keep_entry) {
708     recordings_.event_aggregations[entry->event_hash].dropped_due_to_filter++;
709     return false;
710   }
711   return true;
712 }
713
714 int UkmRecorderImpl::PruneOldSources(size_t max_kept_sources,
715                                      const std::set<SourceId>& pruning_set) {
716   long num_prune_required = recordings_.sources.size() - max_kept_sources;
717   // In either case here, nothing to be done.
718   if (num_prune_required <= 0 || pruning_set.size() == 0)
719     return 0;
720
721   // We can prune everything, so let's do that directly.
722   if (static_cast<unsigned long>(num_prune_required) >= pruning_set.size()) {
723     base::TimeTicks pruned_sources_age = base::TimeTicks();
724     for (const auto& source_id : pruning_set) {
725       auto creation_time = recordings_.sources[source_id]->creation_time();
726       if (creation_time > pruned_sources_age)
727         pruned_sources_age = creation_time;
728
729       recordings_.sources.erase(source_id);
730     }
731     base::TimeDelta age_delta = base::TimeTicks::Now() - pruned_sources_age;
732     // Technically the age we return here isn't quite right, this is the age of
733     // the newest element of the pruned set, while we actually want the age of
734     // the last one kept. However it's very unlikely to make a difference in
735     // practice as if all are pruned here, it is very likely we'll need to prune
736     // from the seen set next. Since it would be logically quite a bit more
737     // complex to get this exactly right, it's ok for this to be very slightly
738     // off in an edge case just to keep complexity down.
739     return age_delta.InSeconds();
740   }
741
742   // In this case we cannot prune everything, so we will select only the oldest
743   // sources to prune.
744
745   // Build a list of timestamp->source pairs for all source we consider for
746   // pruning.
747   std::vector<std::pair<base::TimeTicks, SourceId>> timestamp_source_id_pairs;
748   for (const auto& source_id : pruning_set) {
749     auto creation_time = recordings_.sources[source_id]->creation_time();
750     timestamp_source_id_pairs.emplace_back(
751         std::make_pair(creation_time, source_id));
752   }
753
754   // Partially sort so that the last |num_prune_required| elements are the
755   // newest.
756   std::nth_element(timestamp_source_id_pairs.begin(),
757                    timestamp_source_id_pairs.end() - num_prune_required,
758                    timestamp_source_id_pairs.end());
759
760   // Actually prune |num_prune_required| sources.
761   for (int i = 0; i < num_prune_required; i++) {
762     auto source_id = timestamp_source_id_pairs[i].second;
763     recordings_.sources.erase(source_id);
764   }
765
766   base::TimeDelta pruned_sources_age =
767       base::TimeTicks::Now() -
768       (timestamp_source_id_pairs.end() - (num_prune_required + 1))->first;
769
770   return pruned_sources_age.InSeconds();
771 }
772
773 void UkmRecorderImpl::UpdateSourceURL(SourceId source_id,
774                                       const GURL& unsanitized_url) {
775   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
776   DCHECK(GetSourceIdType(source_id) != SourceIdType::NO_URL_ID);
777
778   if (base::Contains(recordings_.sources, source_id))
779     return;
780
781   const GURL sanitized_url = SanitizeURL(unsanitized_url);
782   if (ShouldRecordUrl(source_id, sanitized_url) ==
783       ShouldRecordUrlResult::kDropped) {
784     return;
785   }
786   RecordSource(std::make_unique<UkmSource>(source_id, sanitized_url));
787 }
788
789 void UkmRecorderImpl::UpdateAppURL(SourceId source_id,
790                                    const GURL& url,
791                                    const AppType app_type) {
792   if (app_type != AppType::kPWA && !recording_enabled(ukm::EXTENSIONS)) {
793     RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED);
794     return;
795   }
796   UpdateSourceURL(source_id, url);
797 }
798
799 void UkmRecorderImpl::RecordNavigation(
800     SourceId source_id,
801     const UkmSource::NavigationData& unsanitized_navigation_data) {
802   DCHECK(GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID);
803   DCHECK(!base::Contains(recordings_.sources, source_id));
804   // TODO(csharrison): Consider changing this behavior so the Source isn't even
805   // recorded at all if the final URL in |unsanitized_navigation_data| should
806   // not be recorded.
807   std::vector<GURL> urls;
808   for (const GURL& url : unsanitized_navigation_data.urls) {
809     const GURL sanitized_url = SanitizeURL(url);
810     if (ShouldRecordUrl(source_id, sanitized_url) !=
811         ShouldRecordUrlResult::kDropped) {
812       urls.push_back(std::move(sanitized_url));
813     }
814   }
815
816   // None of the URLs passed the ShouldRecordUrl check, so do not create a new
817   // Source for them.
818   if (urls.empty())
819     return;
820
821   UkmSource::NavigationData sanitized_navigation_data =
822       unsanitized_navigation_data.CopyWithSanitizedUrls(urls);
823   RecordSource(
824       std::make_unique<UkmSource>(source_id, sanitized_navigation_data));
825 }
826
827 // static:
828 UkmConsentType UkmRecorderImpl::GetConsentType(SourceIdType type) {
829   switch (type) {
830     case SourceIdType::APP_ID:
831       return UkmConsentType::APPS;
832     case SourceIdType::DEFAULT:
833     case SourceIdType::NAVIGATION_ID:
834     case SourceIdType::HISTORY_ID:
835     case SourceIdType::WEBAPK_ID:
836     case SourceIdType::PAYMENT_APP_ID:
837     case SourceIdType::DESKTOP_WEB_APP_ID:
838     case SourceIdType::WORKER_ID:
839     case SourceIdType::NO_URL_ID:
840     case SourceIdType::REDIRECT_ID:
841     case SourceIdType::WEB_IDENTITY_ID:
842     case SourceIdType::CHROMEOS_WEBSITE_ID:
843     case SourceIdType::EXTENSION_ID:
844     case SourceIdType::SOFT_NAVIGATION_ID:
845       return UkmConsentType::MSBB;
846   }
847   return UkmConsentType::MSBB;
848 }
849
850 UkmRecorderImpl::EventAggregate::EventAggregate() = default;
851 UkmRecorderImpl::EventAggregate::~EventAggregate() = default;
852
853 void UkmRecorderImpl::EventAggregate::FillProto(
854     Aggregate* proto_aggregate) const {
855   proto_aggregate->set_source_id(0);  // Across all sources.
856   proto_aggregate->set_total_count(total_count);
857   proto_aggregate->set_dropped_due_to_limits(dropped_due_to_limits);
858   proto_aggregate->set_dropped_due_to_sampling(dropped_due_to_sampling);
859   proto_aggregate->set_dropped_due_to_filter(dropped_due_to_filter);
860   proto_aggregate->set_dropped_due_to_unconfigured(dropped_due_to_unconfigured);
861   for (const auto& metric_and_aggregate : metrics) {
862     const MetricAggregate& aggregate = metric_and_aggregate.second;
863     Aggregate::Metric* proto_metric = proto_aggregate->add_metrics();
864     proto_metric->set_metric_hash(metric_and_aggregate.first);
865     proto_metric->set_value_sum(aggregate.value_sum);
866     proto_metric->set_value_square_sum(aggregate.value_square_sum);
867     if (aggregate.total_count != total_count) {
868       proto_metric->set_total_count(aggregate.total_count);
869     }
870     if (aggregate.dropped_due_to_limits != dropped_due_to_limits) {
871       proto_metric->set_dropped_due_to_limits(aggregate.dropped_due_to_limits);
872     }
873     if (aggregate.dropped_due_to_sampling != dropped_due_to_sampling) {
874       proto_metric->set_dropped_due_to_sampling(
875           aggregate.dropped_due_to_sampling);
876     }
877     if (aggregate.dropped_due_to_filter != dropped_due_to_filter) {
878       proto_metric->set_dropped_due_to_filter(aggregate.dropped_due_to_filter);
879     }
880     if (aggregate.dropped_due_to_unconfigured != dropped_due_to_unconfigured) {
881       proto_metric->set_dropped_due_to_unconfigured(
882           aggregate.dropped_due_to_unconfigured);
883     }
884   }
885 }
886
887 void UkmRecorderImpl::MaybeMarkForDeletion(SourceId source_id) {
888   SourceIdType type = GetSourceIdType(source_id);
889   switch (type) {
890     case ukm::SourceIdObj::Type::HISTORY_ID:
891     case ukm::SourceIdObj::Type::WEBAPK_ID:
892     case ukm::SourceIdObj::Type::PAYMENT_APP_ID:
893     case ukm::SourceIdObj::Type::NO_URL_ID:
894     case ukm::SourceIdObj::Type::WEB_IDENTITY_ID:
895     case ukm::SourceIdObj::Type::CHROMEOS_WEBSITE_ID:
896     case ukm::SourceIdObj::Type::EXTENSION_ID: {
897       // Don't keep sources of these types after current report because their
898       // entries are logged only at source creation time.
899       MarkSourceForDeletion(source_id);
900       break;
901     }
902     case ukm::SourceIdObj::Type::DEFAULT:
903     case ukm::SourceIdObj::Type::APP_ID:
904     case ukm::SourceIdObj::Type::DESKTOP_WEB_APP_ID:
905     case ukm::SourceIdObj::Type::NAVIGATION_ID:
906     case ukm::SourceIdObj::Type::WORKER_ID:
907     case ukm::SourceIdObj::Type::REDIRECT_ID:
908     case ukm::SourceIdObj::Type::SOFT_NAVIGATION_ID:
909       break;
910   }
911 }
912
913 // Extension URLs need to be specifically enabled and the extension synced.
914 bool UkmRecorderImpl::ShouldDropExtensionUrl(
915     const GURL& sanitized_extension_url,
916     bool has_recorded_reason) const {
917   DCHECK_EQ(sanitized_extension_url.GetWithEmptyPath(),
918             sanitized_extension_url);
919
920   // If the URL scheme is not extension scheme, drop the record with
921   // `EXTENSION_URL_INVALID`.
922   if (!sanitized_extension_url.SchemeIs(kExtensionScheme)) {
923     RecordDroppedSource(has_recorded_reason,
924                         DroppedDataReason::EXTENSION_URL_INVALID);
925     return true;
926   }
927   // If the recording is not enabled for extensions, drop the record with
928   // `EXTENSION_URLS_DISABLED`.
929   if (!recording_enabled(ukm::EXTENSIONS)) {
930     RecordDroppedSource(has_recorded_reason,
931                         DroppedDataReason::EXTENSION_URLS_DISABLED);
932     return true;
933   }
934   // If the extension is not a webstore extension, drop the record with
935   // `EXTENSION_NOT_SYNCED`.
936   if (!is_webstore_extension_callback_ ||
937       !is_webstore_extension_callback_.Run(
938           sanitized_extension_url.host_piece())) {
939     RecordDroppedSource(has_recorded_reason,
940                         DroppedDataReason::EXTENSION_NOT_SYNCED);
941     return true;
942   }
943
944   return false;
945 }
946
947 UkmRecorderImpl::ShouldRecordUrlResult UkmRecorderImpl::ShouldRecordUrl(
948     SourceId source_id,
949     const GURL& sanitized_url) const {
950   ShouldRecordUrlResult result = ShouldRecordUrlResult::kOk;
951   bool has_recorded_reason = false;
952   if (!recording_enabled()) {
953     RecordDroppedSource(DroppedDataReason::RECORDING_DISABLED);
954     // Don't return the result yet. Check if the we are allowed to notify
955     // observers, as they may rely on the not uploaded metrics to determine
956     // how some features should work.
957     result = ShouldRecordUrlResult::kObserverOnly;
958     has_recorded_reason = true;
959   }
960
961   const auto required_consent = GetConsentType(GetSourceIdType(source_id));
962
963   if (!recording_enabled(required_consent)) {
964     if (required_consent == UkmConsentType::MSBB) {
965       RecordDroppedSource(has_recorded_reason,
966                           DroppedDataReason::MSBB_CONSENT_DISABLED);
967
968     } else {
969       RecordDroppedSource(has_recorded_reason,
970                           DroppedDataReason::APPS_CONSENT_DISABLED);
971     }
972     return ShouldRecordUrlResult::kDropped;
973   }
974
975   if (recordings_.sources.size() >= max_sources_) {
976     RecordDroppedSource(has_recorded_reason, DroppedDataReason::MAX_HIT);
977     return ShouldRecordUrlResult::kDropped;
978   }
979
980   if (sanitized_url.is_empty()) {
981     RecordDroppedSource(has_recorded_reason, DroppedDataReason::EMPTY_URL);
982     return ShouldRecordUrlResult::kDropped;
983   }
984
985   if (!HasSupportedScheme(sanitized_url)) {
986     RecordDroppedSource(has_recorded_reason,
987                         DroppedDataReason::UNSUPPORTED_URL_SCHEME);
988     DVLOG(2) << "Dropped Unsupported UKM URL:" << source_id << ":"
989              << sanitized_url.spec();
990     return ShouldRecordUrlResult::kDropped;
991   }
992
993   if (GetSourceIdType(source_id) == SourceIdType::EXTENSION_ID) {
994     if (ShouldDropExtensionUrl(sanitized_url, has_recorded_reason)) {
995       return ShouldRecordUrlResult::kDropped;
996     }
997   }
998
999   // Ideally, this check should be covered by the above block for
1000   // `EXTENSION_ID` type. For backward compatibility we still keep it here so
1001   // the UKMs recorded without `EXTENSION_ID` type are also properly checked.
1002   // TODO(https://crbug.com/1393445): clean up all the UKM metrics with
1003   // extension URL to use the dedicated source ID type, and remove this check.
1004   if (sanitized_url.SchemeIs(kExtensionScheme)) {
1005     if (ShouldDropExtensionUrl(sanitized_url, has_recorded_reason)) {
1006       return ShouldRecordUrlResult::kDropped;
1007     }
1008   }
1009   return result;
1010 }
1011
1012 void UkmRecorderImpl::RecordSource(std::unique_ptr<UkmSource> source) {
1013   SourceId source_id = source->id();
1014   // If UKM recording is disabled due to |recording_enabled|,
1015   // still notify observers as they might be interested in it.
1016   NotifyAllObservers(&UkmRecorderObserver::OnUpdateSourceURL, source_id,
1017                      source->urls());
1018
1019   if (!recording_enabled()) {
1020     return;
1021   }
1022
1023   const auto required_consent = GetConsentType(GetSourceIdType(source_id));
1024
1025   if (!recording_enabled(required_consent)) {
1026     return;
1027   }
1028
1029   if (GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID)
1030     recordings_.source_counts.navigation_sources++;
1031   recordings_.source_counts.observed++;
1032   recordings_.sources.emplace(source_id, std::move(source));
1033 }
1034
1035 void UkmRecorderImpl::AddEntry(mojom::UkmEntryPtr entry) {
1036   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
1037   DCHECK(!HasUnknownMetrics(decode_map_, *entry));
1038
1039   NotifyObserversWithNewEntry(*entry);
1040
1041   if (ShouldDropEntry(entry.get()))
1042     return;
1043
1044   EventAggregate& event_aggregate =
1045       recordings_.event_aggregations[entry->event_hash];
1046   event_aggregate.total_count++;
1047   for (const auto& metric : entry->metrics) {
1048     MetricAggregate& aggregate = event_aggregate.metrics[metric.first];
1049     double value = metric.second;
1050     aggregate.total_count++;
1051     aggregate.value_sum += value;
1052     aggregate.value_square_sum += value * value;
1053   }
1054
1055   if (!IsSamplingConfigured()) {
1056     RecordDroppedEntry(entry->event_hash,
1057                        DroppedDataReason::SAMPLING_UNCONFIGURED);
1058     event_aggregate.dropped_due_to_unconfigured++;
1059     for (auto& metric : entry->metrics)
1060       event_aggregate.metrics[metric.first].dropped_due_to_unconfigured++;
1061     return;
1062   }
1063
1064   if (default_sampling_rate_ < 0) {
1065     LoadExperimentSamplingInfo();
1066   }
1067
1068   bool sampled_in = IsSampledIn(entry->source_id, entry->event_hash);
1069
1070   if (!sampled_in) {
1071     RecordDroppedEntry(entry->event_hash, DroppedDataReason::SAMPLED_OUT);
1072     event_aggregate.dropped_due_to_sampling++;
1073     for (auto& metric : entry->metrics)
1074       event_aggregate.metrics[metric.first].dropped_due_to_sampling++;
1075     return;
1076   }
1077
1078   if (recordings_.entries.size() >= max_entries_) {
1079     RecordDroppedEntry(entry->event_hash, DroppedDataReason::MAX_HIT);
1080     event_aggregate.dropped_due_to_limits++;
1081     for (auto& metric : entry->metrics)
1082       event_aggregate.metrics[metric.first].dropped_due_to_limits++;
1083     return;
1084   }
1085
1086   // Log a corresponding entry to UMA so we get a per-metric breakdown of UKM
1087   // entry counts.
1088   // Truncate the unsigned 64-bit hash to 31 bits, to
1089   // make it a suitable histogram sample.
1090   UMA_HISTOGRAM_SPARSE("UKM.Entries.Recorded.ByEntryHash",
1091                        entry->event_hash & 0x7fffffff);
1092
1093   recordings_.entries.push_back(std::move(entry));
1094 }
1095
1096 void UkmRecorderImpl::LoadExperimentSamplingInfo() {
1097   // This should be called only if a sampling rate hasn't been loaded.
1098   DCHECK_LT(default_sampling_rate_, 0);
1099
1100   // Default rate must be >= 0 to indicate that load is complete.
1101   default_sampling_rate_ = 1;
1102
1103   // If we don't have the feature, no parameters to load.
1104   if (!base::FeatureList::IsEnabled(kUkmSamplingRateFeature)) {
1105     return;
1106   }
1107
1108   // Check the parameters for sampling controls.
1109   std::map<std::string, std::string> params;
1110   if (base::GetFieldTrialParamsByFeature(kUkmSamplingRateFeature, &params)) {
1111     LoadExperimentSamplingParams(params);
1112   }
1113 }
1114
1115 void UkmRecorderImpl::LoadExperimentSamplingParams(
1116     const std::map<std::string, std::string>& params) {
1117   for (const auto& kv : params) {
1118     const std::string& key = kv.first;
1119     if (key.length() == 0)
1120       continue;
1121
1122     // Keys starting with an underscore are global configuration.
1123     if (key.at(0) == '_') {
1124       if (key == "_default_sampling") {
1125         int sampling;
1126         // We only load non-negative global sampling rates.
1127         if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
1128           default_sampling_rate_ = sampling;
1129       }
1130       continue;
1131     }
1132
1133     // Anything else is an event name.
1134     int sampling;
1135     auto hash = base::HashMetricName(key);
1136     if (base::StringToInt(kv.second, &sampling)) {
1137       // If the parameter is a number then that's the sampling rate.
1138       if (sampling >= 0)
1139         event_sampling_rates_[hash] = sampling;
1140     } else {
1141       // If the parameter is a string then it's the name of another metric
1142       // to which it should be slaved. This allows different metrics to be
1143       // sampled in or out together.
1144       event_sampling_master_[hash] = base::HashMetricName(kv.second);
1145     }
1146   }
1147 }
1148
1149 bool UkmRecorderImpl::IsSampledIn(int64_t source_id, uint64_t event_id) {
1150   // Determine the sampling rate. It's one of:
1151   // - the default
1152   // - an explicit sampling rate
1153   // - a group sampling rate
1154   int sampling_rate = default_sampling_rate_;
1155   uint64_t sampling_hash = event_id;
1156   auto master_found = event_sampling_master_.find(sampling_hash);
1157   if (master_found != event_sampling_master_.end()) {
1158     sampling_hash = master_found->second;
1159   }
1160   auto rate_found = event_sampling_rates_.find(sampling_hash);
1161   if (rate_found != event_sampling_rates_.end()) {
1162     sampling_rate = rate_found->second;
1163   }
1164
1165   return IsSampledIn(source_id, sampling_hash, sampling_rate);
1166 }
1167
1168 bool UkmRecorderImpl::IsSampledIn(int64_t source_id,
1169                                   uint64_t event_id,
1170                                   int sampling_rate) {
1171   // A sampling rate of 0 is "never"; everything else is 1-in-N but calculated
1172   // deterministically based on a seed, the source-id, and the event-id. Skip
1173   // the calculation, though, if N==1 because it will always be true. A negative
1174   // rate means "unset"; treat it like "never".
1175   if (sampling_rate <= 0)
1176     return false;
1177   if (sampling_rate == 1)
1178     return true;
1179
1180   // Mutate the "sampling seed" number in a predictable manner based on the
1181   // source and event IDs. This makes the result of this function be always
1182   // the same for the same input parameters (since the seed is fixed during
1183   // construction of this object) which is important for proper sampling
1184   // behavior. CRC32 is fast and statistically random enough for these
1185   // purposes.
1186   uint32_t sampled_num = sampling_seed_;
1187   sampled_num = base::Crc32(sampled_num, &source_id, sizeof(source_id));
1188   sampled_num = base::Crc32(sampled_num, &event_id, sizeof(event_id));
1189
1190   return sampled_num % sampling_rate == 0;
1191 }
1192
1193 void UkmRecorderImpl::InitDecodeMap() {
1194   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
1195   decode_map_ = builders::CreateDecodeMap();
1196 }
1197
1198 void UkmRecorderImpl::NotifyObserversWithNewEntry(
1199     const mojom::UkmEntry& entry) {
1200   TRACE_EVENT("toplevel", "UkmRecorderImpl::NotifyObserversWithNewEntry");
1201
1202   base::AutoLock auto_lock(lock_);
1203
1204   for (const auto& observer : observers_) {
1205     if (observer.first.contains(entry.event_hash)) {
1206       TRACE_EVENT(
1207           "toplevel",
1208           "UkmRecorderImpl::NotifyObserversWithNewEntry NotifyObserver");
1209       mojom::UkmEntryPtr cloned = entry.Clone();
1210       observer.second->Notify(FROM_HERE, &UkmRecorderObserver::OnEntryAdded,
1211                               base::Passed(&cloned));
1212     }
1213   }
1214 }
1215
1216 template <typename Method, typename... Params>
1217 void UkmRecorderImpl::NotifyAllObservers(Method m, Params&&... params) {
1218   base::AutoLock auto_lock(lock_);
1219   for (const auto& observer : observers_) {
1220     observer.second->Notify(FROM_HERE, m, std::forward<Params>(params)...);
1221   }
1222 }
1223
1224 std::set<uint64_t> UkmRecorderImpl::GetObservedEventHashes() {
1225   base::AutoLock lock(lock_);
1226   std::set<uint64_t> hashes;
1227   for (const auto& observer : observers_) {
1228     hashes.insert(observer.first.begin(), observer.first.end());
1229   }
1230   return hashes;
1231 }
1232
1233 }  // namespace ukm