1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """A collection of statistical utility functions to be used by metrics."""
11 def Clamp(value, low=0.0, high=1.0):
12 """Clamp a value between some low and high value."""
13 return min(max(value, low), high)
16 def NormalizeSamples(samples):
17 """Sorts the samples, and map them linearly to the range [0,1].
19 They're mapped such that for the N samples, the first sample is 0.5/N and the
20 last sample is (N-0.5)/N.
22 Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,
23 twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case
24 we don't want to distinguish between these two cases, as our original domain
25 is not bounded (it is for Monte Carlo integration, where discrepancy was
30 samples = sorted(samples)
33 new_low = 0.5 / len(samples)
34 new_high = (len(samples)-0.5) / len(samples)
37 scale = (new_high - new_low) / (high - low)
38 for i in xrange(0, len(samples)):
39 samples[i] = float(samples[i] - low) * scale + new_low
43 def Discrepancy(samples, interval_multiplier=10000):
44 """Computes the discrepancy of a set of 1D samples from the interval [0,1].
46 The samples must be sorted.
48 http://en.wikipedia.org/wiki/Low-discrepancy_sequence
49 http://mathworld.wolfram.com/Discrepancy.html
54 max_local_discrepancy = 0
56 # For each location, stores the number of samples less than that location.
58 # For each location, stores the number of samples less than or equal to that
62 interval_count = len(samples) * interval_multiplier
63 # Compute number of locations the will roughly result in the requested number
65 location_count = int(math.ceil(math.sqrt(interval_count*2)))
66 inv_sample_count = 1.0 / len(samples)
68 # Generate list of equally spaced locations.
69 for i in xrange(0, location_count):
70 location = float(i) / (location_count-1)
71 locations.append(location)
72 left.append(bisect.bisect_left(samples, location))
73 right.append(bisect.bisect_right(samples, location))
75 # Iterate over the intervals defined by any pair of locations.
76 for i in xrange(0, len(locations)):
77 for j in xrange(i, len(locations)):
78 # Compute length of interval and number of samples in the interval.
79 length = locations[j] - locations[i]
80 count = right[j] - left[i]
82 # Compute local discrepancy and update max_local_discrepancy.
83 local_discrepancy = abs(float(count)*inv_sample_count - length)
84 max_local_discrepancy = max(local_discrepancy, max_local_discrepancy)
86 return max_local_discrepancy
89 def FrameDiscrepancy(frame_timestamps, absolute=True,
90 interval_multiplier=10000):
91 """A discrepancy based metric for measuring jank.
93 FrameDiscrepancy quantifies the largest area of jank observed in a series
94 of timestamps. Note that this is different form metrics based on the
95 max_frame_time. For example, the time stamp series A = [0,1,2,3,5,6] and
96 B = [0,1,2,3,5,7] have the same max_frame_time = 2, but
97 Discrepancy(B) > Discrepancy(A).
99 Two variants of discrepancy can be computed:
101 Relative discrepancy is following the original definition of
102 discrepancy. It characterized the largest area of jank, relative to the
103 duration of the entire time stamp series. We normalize the raw results,
104 because the best case discrepancy for a set of N samples is 1/N (for
105 equally spaced samples), and we want our metric to report 0.0 in that
108 Absolute discrepancy also characterizes the largest area of jank, but its
109 value wouldn't change (except for imprecisions due to a low
110 interval_multiplier) if additional 'good' frames were added to an
111 exisiting list of time stamps. Its range is [0,inf] and the unit is
114 The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
115 absolute discrepancy, but D has lower relative discrepancy than C.
117 if not frame_timestamps:
119 samples, sample_scale = NormalizeSamples(frame_timestamps)
120 discrepancy = Discrepancy(samples, interval_multiplier)
121 inv_sample_count = 1.0 / len(samples)
123 # Compute absolute discrepancy
124 discrepancy /= sample_scale
126 # Compute relative discrepancy
127 discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))
131 def ArithmeticMean(numerator, denominator):
132 """Calculates arithmetic mean.
134 Both numerator and denominator can be given as either individual
135 values or lists of values which will be summed.
138 numerator: A quantity that represents a sum total value.
139 denominator: A quantity that represents a count of the number of things.
142 The arithmetic mean value, or 0 if the denominator value was 0.
144 numerator_total = Total(numerator)
145 denominator_total = Total(denominator)
146 return DivideIfPossibleOrZero(numerator_total, denominator_total)
150 """Returns the float value of a number or the sum of a list."""
151 if type(data) == float:
153 elif type(data) == int:
155 elif type(data) == list:
156 total = float(sum(data))
162 def DivideIfPossibleOrZero(numerator, denominator):
163 """Returns the quotient, or zero if the denominator is zero."""
167 return numerator / denominator
170 def GeneralizedMean(values, exponent):
171 """See http://en.wikipedia.org/wiki/Generalized_mean"""
176 sum_of_powers += v ** exponent
177 return (sum_of_powers / len(values)) ** (1.0/exponent)
181 """Gets the median of a list of values."""
182 return Percentile(values, 50)
185 def Percentile(values, percentile):
186 """Calculates the value below which a given percentage of values fall.
188 For example, if 17% of the values are less than 5.0, then 5.0 is the 17th
189 percentile for this set of values. When the percentage doesn't exactly
190 match a rank in the list of values, the percentile is computed using linear
191 interpolation between closest ranks.
194 values: A list of numerical values.
195 percentile: A number between 0 and 100.
198 The Nth percentile for the list of values, where N is the given percentage.
202 sorted_values = sorted(values)
205 if percentile <= 0.5 / n:
206 return sorted_values[0]
207 elif percentile >= (n - 0.5) / n:
208 return sorted_values[-1]
210 floor_index = int(math.floor(n * percentile - 0.5))
211 floor_value = sorted_values[floor_index]
212 ceil_value = sorted_values[floor_index+1]
213 alpha = n * percentile - 0.5 - floor_index
214 return floor_value + alpha * (ceil_value - floor_value)