[M73 Dev][EFL] Disable VizDisplayCompositor for EFL port
[platform/framework/web/chromium-efl.git] / components / ukm / ukm_recorder_impl.cc
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/ukm/ukm_recorder_impl.h"
6
7 #include <limits>
8 #include <memory>
9 #include <string>
10 #include <utility>
11
12 #include "base/feature_list.h"
13 #include "base/metrics/field_trial.h"
14 #include "base/metrics/field_trial_params.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/metrics/metrics_hashes.h"
17 #include "base/rand_util.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/string_split.h"
20 #include "components/variations/variations_associated_data.h"
21 #include "services/metrics/public/cpp/ukm_decode.h"
22 #include "services/metrics/public/cpp/ukm_source.h"
23 #include "services/metrics/public/cpp/ukm_source_id.h"
24 #include "third_party/metrics_proto/ukm/entry.pb.h"
25 #include "third_party/metrics_proto/ukm/report.pb.h"
26 #include "third_party/metrics_proto/ukm/source.pb.h"
27 #include "url/gurl.h"
28
29 namespace ukm {
30
31 namespace {
32
33 // Note: kChromeUIScheme is defined in content, which this code can't
34 // depend on - since it's used by iOS too. kExtensionScheme is defined
35 // in extensions which also isn't always available here. kAppScheme
36 // will be defined in code that isn't available here.
37 const char kChromeUIScheme[] = "chrome";
38 const char kExtensionScheme[] = "chrome-extension";
39 const char kAppScheme[] = "app";
40
41 const base::Feature kUkmSamplingRateFeature{"UkmSamplingRate",
42                                             base::FEATURE_DISABLED_BY_DEFAULT};
43
44 // Gets the list of whitelisted Entries as string. Format is a comma separated
45 // list of Entry names (as strings).
46 std::string GetWhitelistEntries() {
47   return base::GetFieldTrialParamValueByFeature(kUkmFeature,
48                                                 "WhitelistEntries");
49 }
50
51 bool IsWhitelistedSourceId(SourceId source_id) {
52   return GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID ||
53          GetSourceIdType(source_id) == SourceIdType::APP_ID;
54 }
55
56 // Gets the maximum number of Sources we'll keep in memory before discarding any
57 // new ones being added.
58 size_t GetMaxSources() {
59   constexpr size_t kDefaultMaxSources = 500;
60   return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
61       kUkmFeature, "MaxSources", kDefaultMaxSources));
62 }
63
64 // Gets the maximum number of unreferenced Sources kept after purging sources
65 // that were added to the log.
66 size_t GetMaxKeptSources() {
67   constexpr size_t kDefaultMaxKeptSources = 100;
68   return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
69       kUkmFeature, "MaxKeptSources", kDefaultMaxKeptSources));
70 }
71
72 // Gets the maximum number of Entries we'll keep in memory before discarding any
73 // new ones being added.
74 size_t GetMaxEntries() {
75   constexpr size_t kDefaultMaxEntries = 5000;
76   return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
77       kUkmFeature, "MaxEntries", kDefaultMaxEntries));
78 }
79
80 // Returns whether |url| has one of the schemes supported for logging to UKM.
81 // URLs with other schemes will not be logged.
82 bool HasSupportedScheme(const GURL& url) {
83   return url.SchemeIsHTTPOrHTTPS() || url.SchemeIs(url::kFtpScheme) ||
84          url.SchemeIs(url::kAboutScheme) || url.SchemeIs(kChromeUIScheme) ||
85          url.SchemeIs(kExtensionScheme) || url.SchemeIs(kAppScheme);
86 }
87
88 // True if we should record the initial_url field of the UKM Source proto.
89 bool ShouldRecordInitialUrl() {
90   return base::GetFieldTrialParamByFeatureAsBool(kUkmFeature,
91                                                  "RecordInitialUrl", false);
92 }
93
94 enum class DroppedDataReason {
95   NOT_DROPPED = 0,
96   RECORDING_DISABLED = 1,
97   MAX_HIT = 2,
98   NOT_WHITELISTED = 3,
99   UNSUPPORTED_URL_SCHEME = 4,
100   SAMPLED_OUT = 5,
101   EXTENSION_URLS_DISABLED = 6,
102   EXTENSION_NOT_SYNCED = 7,
103   NOT_MATCHED = 8,
104   EMPTY_URL = 9,
105   NUM_DROPPED_DATA_REASONS
106 };
107
108 void RecordDroppedSource(DroppedDataReason reason) {
109   UMA_HISTOGRAM_ENUMERATION(
110       "UKM.Sources.Dropped", static_cast<int>(reason),
111       static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
112 }
113
114 void RecordDroppedEntry(DroppedDataReason reason) {
115   UMA_HISTOGRAM_ENUMERATION(
116       "UKM.Entries.Dropped", static_cast<int>(reason),
117       static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
118 }
119
120 void StoreEntryProto(const mojom::UkmEntry& in, Entry* out) {
121   DCHECK(!out->has_source_id());
122   DCHECK(!out->has_event_hash());
123
124   out->set_source_id(in.source_id);
125   out->set_event_hash(in.event_hash);
126   for (const auto& metric : in.metrics) {
127     Entry::Metric* proto_metric = out->add_metrics();
128     proto_metric->set_metric_hash(metric.first);
129     proto_metric->set_value(metric.second);
130   }
131 }
132
133 GURL SanitizeURL(const GURL& url) {
134   GURL::Replacements remove_params;
135   remove_params.ClearUsername();
136   remove_params.ClearPassword();
137   // chrome:// and about: URLs params are never used for navigation, only to
138   // prepopulate data on the page, so don't include their params.
139   if (url.SchemeIs(url::kAboutScheme) || url.SchemeIs("chrome")) {
140     remove_params.ClearQuery();
141   }
142   if (url.SchemeIs(kExtensionScheme)) {
143     remove_params.ClearPath();
144     remove_params.ClearQuery();
145     remove_params.ClearRef();
146   }
147   return url.ReplaceComponents(remove_params);
148 }
149
150 void AppendWhitelistedUrls(
151     const std::map<SourceId, std::unique_ptr<UkmSource>>& sources,
152     std::unordered_set<std::string>* urls) {
153   for (const auto& kv : sources) {
154     if (IsWhitelistedSourceId(kv.first)) {
155       urls->insert(kv.second->url().spec());
156       // Some non-navigation sources only record origin as a URL.
157       // Add the origin from the navigation source to match those too.
158       urls->insert(kv.second->url().GetOrigin().spec());
159     }
160   }
161 }
162
163 bool HasUnknownMetrics(const ukm::builders::DecodeMap& decode_map,
164                        const mojom::UkmEntry& entry) {
165   const auto it = decode_map.find(entry.event_hash);
166   if (it == decode_map.end())
167     return true;
168   const auto& metric_map = it->second.metric_map;
169   for (const auto& metric : entry.metrics) {
170     if (metric_map.count(metric.first) == 0)
171       return true;
172   }
173   return false;
174 }
175
176 }  // namespace
177
178 UkmRecorderImpl::UkmRecorderImpl() : recording_enabled_(false) {}
179 UkmRecorderImpl::~UkmRecorderImpl() = default;
180
181 // static
182 void UkmRecorderImpl::CreateFallbackSamplingTrial(
183     bool is_stable_channel,
184     base::FeatureList* feature_list) {
185   static const char kSampledGroup_Stable[] = "Sampled_NoSeed_Stable";
186   static const char kSampledGroup_Other[] = "Sampled_NoSeed_Other";
187   const char* sampled_group = kSampledGroup_Other;
188   int default_sampling = 1;  // Sampling is 1-in-N; this is N.
189
190   // Nothing is sampled out except for "stable" which omits almost everything
191   // in this configuration. This is done so that clients that fail to receive
192   // a configuration from the server do not bias aggregated results because
193   // of a relatively large number of records from them.
194   if (is_stable_channel) {
195     sampled_group = kSampledGroup_Stable;
196     default_sampling = 1000000;
197   }
198
199   scoped_refptr<base::FieldTrial> trial(
200       base::FieldTrialList::FactoryGetFieldTrial(
201           kUkmSamplingRateFeature.name, 100, sampled_group,
202           base::FieldTrialList::kNoExpirationYear, 1, 1,
203           base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr));
204
205   // Everybody (100%) should have a sampling configuration.
206   std::map<std::string, std::string> params = {
207       {"_default_sampling", base::IntToString(default_sampling)}};
208   variations::AssociateVariationParams(trial->trial_name(), sampled_group,
209                                        params);
210   trial->AppendGroup(sampled_group, 100);
211
212   // Setup the feature.
213   feature_list->RegisterFieldTrialOverride(
214       kUkmSamplingRateFeature.name, base::FeatureList::OVERRIDE_ENABLE_FEATURE,
215       trial.get());
216 }
217
218 UkmRecorderImpl::EventAggregate::EventAggregate() = default;
219 UkmRecorderImpl::EventAggregate::~EventAggregate() = default;
220
221 UkmRecorderImpl::PageSampling::PageSampling() = default;
222 UkmRecorderImpl::PageSampling::~PageSampling() = default;
223
224 void UkmRecorderImpl::PageSampling::Set(uint64_t event_id, bool sampled_in) {
225   event_sampling_[event_id] = sampled_in;
226   modified_ = true;
227 }
228
229 bool UkmRecorderImpl::PageSampling::Find(uint64_t event_id,
230                                          bool* out_sampled_in) const {
231   auto found = event_sampling_.find(event_id);
232   if (found == event_sampling_.end())
233     return false;
234   *out_sampled_in = found->second;
235   return true;
236 }
237
238 UkmRecorderImpl::Recordings::Recordings() = default;
239 UkmRecorderImpl::Recordings& UkmRecorderImpl::Recordings::operator=(
240     Recordings&&) = default;
241 UkmRecorderImpl::Recordings::~Recordings() = default;
242
243 void UkmRecorderImpl::Recordings::Reset() {
244   *this = Recordings();
245 }
246
247 void UkmRecorderImpl::Recordings::SourceCounts::Reset() {
248   *this = SourceCounts();
249 }
250
251 void UkmRecorderImpl::EnableRecording(bool extensions) {
252   DVLOG(1) << "UkmRecorderImpl::EnableRecording, extensions=" << extensions;
253   recording_enabled_ = true;
254   extensions_enabled_ = extensions;
255 }
256
257 void UkmRecorderImpl::DisableRecording() {
258   DVLOG(1) << "UkmRecorderImpl::DisableRecording";
259   if (recording_enabled_)
260     recording_is_continuous_ = false;
261   recording_enabled_ = false;
262   extensions_enabled_ = false;
263 }
264
265 void UkmRecorderImpl::DisableSamplingForTesting() {
266   sampling_enabled_ = false;
267 }
268
269 void UkmRecorderImpl::Purge() {
270   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
271   source_event_sampling_.clear();
272   recordings_.Reset();
273   recording_is_continuous_ = false;
274 }
275
276 void UkmRecorderImpl::SetIsWebstoreExtensionCallback(
277     const IsWebstoreExtensionCallback& callback) {
278   is_webstore_extension_callback_ = callback;
279 }
280
281 void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
282   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
283
284   std::set<SourceId> ids_seen;
285   for (const auto& entry : recordings_.entries) {
286     Entry* proto_entry = report->add_entries();
287     StoreEntryProto(*entry, proto_entry);
288     ids_seen.insert(entry->source_id);
289   }
290
291   std::unordered_set<std::string> url_whitelist;
292   recordings_.carryover_urls_whitelist.swap(url_whitelist);
293   AppendWhitelistedUrls(recordings_.sources, &url_whitelist);
294
295   std::vector<std::unique_ptr<UkmSource>> unsent_sources;
296   int unmatched_sources = 0;
297   std::unordered_map<ukm::SourceIdType, int> serialized_source_type_counts;
298   for (auto& kv : recordings_.sources) {
299     // If the source id is not whitelisted, don't send it unless it has
300     // associated entries and the URL matches a URL of a whitelisted source.
301     // Note: If ShouldRestrictToWhitelistedSourceIds() is true, this logic will
302     // not be hit as the source would have already been filtered in
303     // UpdateSourceURL().
304     if (!IsWhitelistedSourceId(kv.first)) {
305       // UkmSource should not keep initial_url for non-navigation source IDs.
306       DCHECK_EQ(1u, kv.second->urls().size());
307       if (!url_whitelist.count(kv.second->url().spec())) {
308         RecordDroppedSource(DroppedDataReason::NOT_MATCHED);
309         unmatched_sources++;
310         continue;
311       }
312       if (!base::ContainsKey(ids_seen, kv.first)) {
313         unsent_sources.push_back(std::move(kv.second));
314         continue;
315       }
316     }
317     Source* proto_source = report->add_sources();
318     kv.second->PopulateProto(proto_source);
319     if (!ShouldRecordInitialUrl())
320       proto_source->clear_initial_url();
321
322     serialized_source_type_counts[GetSourceIdType(kv.first)]++;
323   }
324   for (const auto& event_and_aggregate : recordings_.event_aggregations) {
325     if (event_and_aggregate.second.metrics.empty())
326       continue;
327     const EventAggregate& event_aggregate = event_and_aggregate.second;
328     Aggregate* proto_aggregate = report->add_aggregates();
329     proto_aggregate->set_source_id(0);  // Across all sources.
330     proto_aggregate->set_event_hash(event_and_aggregate.first);
331     proto_aggregate->set_total_count(event_aggregate.total_count);
332     proto_aggregate->set_dropped_due_to_limits(
333         event_aggregate.dropped_due_to_limits);
334     proto_aggregate->set_dropped_due_to_sampling(
335         event_aggregate.dropped_due_to_sampling);
336     proto_aggregate->set_dropped_due_to_whitelist(
337         event_aggregate.dropped_due_to_whitelist);
338     for (const auto& metric_and_aggregate : event_aggregate.metrics) {
339       const MetricAggregate& aggregate = metric_and_aggregate.second;
340       Aggregate::Metric* proto_metric = proto_aggregate->add_metrics();
341       proto_metric->set_metric_hash(metric_and_aggregate.first);
342       proto_metric->set_value_sum(aggregate.value_sum);
343       proto_metric->set_value_square_sum(aggregate.value_square_sum);
344       if (aggregate.total_count != event_aggregate.total_count) {
345         proto_metric->set_total_count(aggregate.total_count);
346       }
347       if (aggregate.dropped_due_to_limits !=
348           event_aggregate.dropped_due_to_limits) {
349         proto_metric->set_dropped_due_to_limits(
350             aggregate.dropped_due_to_limits);
351       }
352       if (aggregate.dropped_due_to_sampling !=
353           event_aggregate.dropped_due_to_sampling) {
354         proto_metric->set_dropped_due_to_sampling(
355             aggregate.dropped_due_to_sampling);
356       }
357       if (aggregate.dropped_due_to_whitelist !=
358           event_aggregate.dropped_due_to_whitelist) {
359         proto_metric->set_dropped_due_to_whitelist(
360             aggregate.dropped_due_to_whitelist);
361       }
362     }
363   }
364   int num_serialized_sources = 0;
365   for (const auto& entry : serialized_source_type_counts) {
366     num_serialized_sources += entry.second;
367   }
368
369   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount2",
370                             num_serialized_sources);
371   UMA_HISTOGRAM_COUNTS_100000("UKM.Entries.SerializedCount2",
372                               recordings_.entries.size());
373   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnsentSourcesCount",
374                             unsent_sources.size());
375   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnmatchedSourcesCount",
376                             unmatched_sources);
377
378   UMA_HISTOGRAM_COUNTS_1000(
379       "UKM.Sources.SerializedCount2.Ukm",
380       serialized_source_type_counts[ukm::SourceIdType::UKM]);
381   UMA_HISTOGRAM_COUNTS_1000(
382       "UKM.Sources.SerializedCount2.Navigation",
383       serialized_source_type_counts[ukm::SourceIdType::NAVIGATION_ID]);
384   UMA_HISTOGRAM_COUNTS_1000(
385       "UKM.Sources.SerializedCount2.App",
386       serialized_source_type_counts[ukm::SourceIdType::APP_ID]);
387
388   Report::SourceCounts* source_counts_proto = report->mutable_source_counts();
389   source_counts_proto->set_observed(recordings_.source_counts.observed);
390   source_counts_proto->set_navigation_sources(
391       recordings_.source_counts.navigation_sources);
392   source_counts_proto->set_unmatched_sources(unmatched_sources);
393   source_counts_proto->set_deferred_sources(unsent_sources.size());
394   source_counts_proto->set_carryover_sources(
395       recordings_.source_counts.carryover_sources);
396
397   recordings_.sources.clear();
398   recordings_.source_counts.Reset();
399   recordings_.entries.clear();
400   recordings_.event_aggregations.clear();
401
402   report->set_is_continuous(recording_is_continuous_);
403   recording_is_continuous_ = true;
404
405   // Keep at most |max_kept_sources|, prioritizing most-recent entries (by
406   // creation time).
407   const size_t max_kept_sources = GetMaxKeptSources();
408   if (unsent_sources.size() > max_kept_sources) {
409     std::nth_element(unsent_sources.begin(),
410                      unsent_sources.begin() + max_kept_sources,
411                      unsent_sources.end(),
412                      [](const std::unique_ptr<ukm::UkmSource>& lhs,
413                         const std::unique_ptr<ukm::UkmSource>& rhs) {
414                        return lhs->creation_time() > rhs->creation_time();
415                      });
416     unsent_sources.resize(max_kept_sources);
417   }
418
419   for (auto& source : unsent_sources) {
420     // We already matched these sources against the URL whitelist.
421     // Re-whitelist them for the next report.
422     recordings_.carryover_urls_whitelist.insert(source->url().spec());
423     recordings_.sources.emplace(source->id(), std::move(source));
424   }
425   UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.KeptSourcesCount",
426                             recordings_.sources.size());
427   recordings_.source_counts.carryover_sources = recordings_.sources.size();
428
429   // Check all the event-sampling values and clear those for any sources
430   // not seen since the last data upload. This ensure that pages never
431   // visited again don't continue to use memory remembering what events
432   // were sampled-in the last time it was accessed. They can't simply be
433   // cleared here because this call could come in the middle of a page
434   // load.
435   auto iter = source_event_sampling_.begin();
436   auto next = iter;
437   while (iter != source_event_sampling_.end()) {
438     // Increment here (and copy later) because otherwise erasing |iter| would
439     // break iteration.
440     ++next;
441
442     // If the PageSampling has been modified since the last upload of data,
443     // clear that flag and continue. If it hasn't been modified, remove the
444     // entire object.
445     if (iter->second.modified())
446       iter->second.clear_modified();
447     else
448       source_event_sampling_.erase(iter);
449
450     iter = next;
451   }
452 }
453
454 bool UkmRecorderImpl::ShouldRestrictToWhitelistedSourceIds() const {
455   return base::GetFieldTrialParamByFeatureAsBool(
456       kUkmFeature, "RestrictToWhitelistedSourceIds", false);
457 }
458
459 bool UkmRecorderImpl::ShouldRestrictToWhitelistedEntries() const {
460   return true;
461 }
462
463 void UkmRecorderImpl::UpdateSourceURL(SourceId source_id,
464                                       const GURL& unsanitized_url) {
465   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
466
467   if (base::ContainsKey(recordings_.sources, source_id))
468     return;
469
470   const GURL sanitized_url = SanitizeURL(unsanitized_url);
471   if (!ShouldRecordUrl(source_id, sanitized_url))
472     return;
473
474   RecordSource(std::make_unique<UkmSource>(source_id, sanitized_url));
475 }
476
477 void UkmRecorderImpl::UpdateAppURL(SourceId source_id, const GURL& url) {
478   if (!extensions_enabled_) {
479     RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED);
480     return;
481   }
482   UpdateSourceURL(source_id, url);
483 }
484
485 void UkmRecorderImpl::RecordNavigation(
486     SourceId source_id,
487     const UkmSource::NavigationData& unsanitized_navigation_data) {
488   DCHECK(GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID);
489   DCHECK(!base::ContainsKey(recordings_.sources, source_id));
490   // TODO(csharrison): Consider changing this behavior so the Source isn't event
491   // recorded at all if the final URL in |unsanitized_navigation_data| should
492   // not be recorded.
493   std::vector<GURL> urls;
494   for (const GURL& url : unsanitized_navigation_data.urls) {
495     const GURL sanitized_url = SanitizeURL(url);
496     if (ShouldRecordUrl(source_id, sanitized_url))
497       urls.push_back(std::move(sanitized_url));
498   }
499
500   // None of the URLs passed the ShouldRecordUrl check, so do not create a new
501   // Source for them.
502   if (urls.empty())
503     return;
504
505   UkmSource::NavigationData sanitized_navigation_data =
506       unsanitized_navigation_data.CopyWithSanitizedUrls(urls);
507   RecordSource(
508       std::make_unique<UkmSource>(source_id, sanitized_navigation_data));
509 }
510
511 bool UkmRecorderImpl::ShouldRecordUrl(SourceId source_id,
512                                       const GURL& sanitized_url) const {
513   if (!recording_enabled_) {
514     RecordDroppedSource(DroppedDataReason::RECORDING_DISABLED);
515     return false;
516   }
517
518   if (recordings_.sources.size() >= GetMaxSources()) {
519     RecordDroppedSource(DroppedDataReason::MAX_HIT);
520     return false;
521   }
522
523   if (ShouldRestrictToWhitelistedSourceIds() &&
524       !IsWhitelistedSourceId(source_id)) {
525     RecordDroppedSource(DroppedDataReason::NOT_WHITELISTED);
526     return false;
527   }
528
529   if (sanitized_url.is_empty()) {
530     RecordDroppedSource(DroppedDataReason::EMPTY_URL);
531     return false;
532   }
533
534   if (!HasSupportedScheme(sanitized_url)) {
535     RecordDroppedSource(DroppedDataReason::UNSUPPORTED_URL_SCHEME);
536     DVLOG(2) << "Dropped Unsupported UKM URL:" << source_id << ":"
537              << sanitized_url.spec();
538     return false;
539   }
540
541   // Extension URLs need to be specifically enabled and the extension synced.
542   if (sanitized_url.SchemeIs(kExtensionScheme)) {
543     DCHECK_EQ(sanitized_url.GetWithEmptyPath(), sanitized_url);
544     if (!extensions_enabled_) {
545       RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED);
546       return false;
547     }
548     if (!is_webstore_extension_callback_ ||
549         !is_webstore_extension_callback_.Run(sanitized_url.host_piece())) {
550       RecordDroppedSource(DroppedDataReason::EXTENSION_NOT_SYNCED);
551       return false;
552     }
553   }
554   return true;
555 }
556
557 void UkmRecorderImpl::RecordSource(std::unique_ptr<UkmSource> source) {
558   SourceId source_id = source->id();
559   if (GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID)
560     recordings_.source_counts.navigation_sources++;
561   recordings_.source_counts.observed++;
562   recordings_.sources.emplace(source_id, std::move(source));
563 }
564
565 void UkmRecorderImpl::AddEntry(mojom::UkmEntryPtr entry) {
566   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
567
568   DCHECK(!HasUnknownMetrics(decode_map_, *entry));
569
570   if (!recording_enabled_) {
571     RecordDroppedEntry(DroppedDataReason::RECORDING_DISABLED);
572     return;
573   }
574
575   EventAggregate& event_aggregate =
576       recordings_.event_aggregations[entry->event_hash];
577   event_aggregate.total_count++;
578   for (const auto& metric : entry->metrics) {
579     MetricAggregate& aggregate = event_aggregate.metrics[metric.first];
580     double value = metric.second;
581     aggregate.total_count++;
582     aggregate.value_sum += value;
583     aggregate.value_square_sum += value * value;
584   }
585
586   if (ShouldRestrictToWhitelistedEntries() &&
587       !base::ContainsKey(whitelisted_entry_hashes_, entry->event_hash)) {
588     RecordDroppedEntry(DroppedDataReason::NOT_WHITELISTED);
589     event_aggregate.dropped_due_to_whitelist++;
590     for (auto& metric : entry->metrics)
591       event_aggregate.metrics[metric.first].dropped_due_to_whitelist++;
592     return;
593   }
594
595   if (default_sampling_rate_ == 0)
596     LoadExperimentSamplingInfo();
597
598   bool sampled_in = true;  // Overwritten by Find(...) if it returns True.
599   PageSampling* page_sampling = &source_event_sampling_[entry->source_id];
600   if (!page_sampling->Find(entry->event_hash, &sampled_in)) {
601     auto found = event_sampling_rates_.find(entry->event_hash);
602     int sampling_rate = (found != event_sampling_rates_.end())
603                             ? found->second
604                             : default_sampling_rate_;
605     sampled_in = IsSampledIn(sampling_rate);
606
607     // Remember the decision for this event for this page so all such events
608     // on this page are sampled-in or sampled-out together making it possible
609     // to correlate between events and within a page.
610     page_sampling->Set(entry->event_hash, sampled_in);
611   }
612
613   if (!sampled_in && sampling_enabled_) {
614     RecordDroppedEntry(DroppedDataReason::SAMPLED_OUT);
615     event_aggregate.dropped_due_to_sampling++;
616     for (auto& metric : entry->metrics)
617       event_aggregate.metrics[metric.first].dropped_due_to_sampling++;
618     return;
619   }
620
621   if (recordings_.entries.size() >= GetMaxEntries()) {
622     RecordDroppedEntry(DroppedDataReason::MAX_HIT);
623     event_aggregate.dropped_due_to_limits++;
624     for (auto& metric : entry->metrics)
625       event_aggregate.metrics[metric.first].dropped_due_to_limits++;
626     return;
627   }
628
629   recordings_.entries.push_back(std::move(entry));
630 }
631
632 void UkmRecorderImpl::LoadExperimentSamplingInfo() {
633   DCHECK_EQ(0, default_sampling_rate_);
634   std::map<std::string, std::string> params;
635
636   if (base::FeatureList::IsEnabled(kUkmSamplingRateFeature)) {
637     // Enabled may have various parameters to control sampling.
638     if (base::GetFieldTrialParamsByFeature(kUkmSamplingRateFeature, &params)) {
639       for (const auto& kv : params) {
640         const std::string& key = kv.first;
641         if (key.length() == 0)
642           continue;
643
644         // Keys starting with an underscore are global configuration.
645         if (key.at(0) == '_') {
646           if (key == "_default_sampling") {
647             int sampling;
648             if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
649               default_sampling_rate_ = sampling;
650           }
651           continue;
652         }
653
654         // Anything else is an event name.
655         int sampling;
656         if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
657           event_sampling_rates_[base::HashMetricName(key)] = sampling;
658       }
659     }
660   }
661
662   // Default rate must be >0 to indicate that load is complete.
663   if (default_sampling_rate_ == 0)
664     default_sampling_rate_ = 1;
665 }
666
667 bool UkmRecorderImpl::IsSampledIn(int sampling_rate) {
668   // A sampling rate of 0 is "never"; everything else is 1-in-N but skip
669   // the RandInt() call if N==1.
670   return sampling_rate > 0 &&
671          (sampling_rate == 1 || base::RandInt(1, sampling_rate) != 1);
672 }
673
674 void UkmRecorderImpl::StoreWhitelistedEntries() {
675   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
676   const auto entries =
677       base::SplitString(GetWhitelistEntries(), ",", base::TRIM_WHITESPACE,
678                         base::SPLIT_WANT_NONEMPTY);
679   for (const auto& entry_string : entries)
680     whitelisted_entry_hashes_.insert(base::HashMetricName(entry_string));
681   decode_map_ = ::ukm::builders::CreateDecodeMap();
682 }
683
684 }  // namespace ukm