src/tools/perf/metrics/statistics.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 """A collection of statistical utility functions to be used by metrics."""
   6
   7 import bisect
   8 import math
   9
  10
  11 def Clamp(value, low=0.0, high=1.0):
  12   """Clamp a value between some low and high value."""
  13   return min(max(value, low), high)
  14
  15
  16 def NormalizeSamples(samples):
  17   """Sorts the samples, and map them linearly to the range [0,1].
  18
  19   They're mapped such that for the N samples, the first sample is 0.5/N and the
  20   last sample is (N-0.5)/N.
  21
  22   Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,
  23   twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case
  24   we don't want to distinguish between these two cases, as our original domain
  25   is not bounded (it is for Monte Carlo integration, where discrepancy was
  26   first used).
  27   """
  28   if not samples:
  29     return samples, 1.0
  30   samples = sorted(samples)
  31   low = min(samples)
  32   high = max(samples)
  33   new_low = 0.5 / len(samples)
  34   new_high = (len(samples)-0.5) / len(samples)
  35   if high-low == 0.0:
  36     return samples, 1.0
  37   scale = (new_high - new_low) / (high - low)
  38   for i in xrange(0, len(samples)):
  39     samples[i] = float(samples[i] - low) * scale + new_low
  40   return samples, scale
  41
  42
  43 def Discrepancy(samples, interval_multiplier=10000):
  44   """Computes the discrepancy of a set of 1D samples from the interval [0,1].
  45
  46   The samples must be sorted.
  47
  48   http://en.wikipedia.org/wiki/Low-discrepancy_sequence
  49   http://mathworld.wolfram.com/Discrepancy.html
  50   """
  51   if not samples:
  52     return 1.0
  53
  54   max_local_discrepancy = 0
  55   locations = []
  56   # For each location, stores the number of samples less than that location.
  57   left = []
  58   # For each location, stores the number of samples less than or equal to that
  59   # location.
  60   right = []
  61
  62   interval_count = len(samples) * interval_multiplier
  63   # Compute number of locations the will roughly result in the requested number
  64   # of intervals.
  65   location_count = int(math.ceil(math.sqrt(interval_count*2)))
  66   inv_sample_count = 1.0 / len(samples)
  67
  68   # Generate list of equally spaced locations.
  69   for i in xrange(0, location_count):
  70     location = float(i) / (location_count-1)
  71     locations.append(location)
  72     left.append(bisect.bisect_left(samples, location))
  73     right.append(bisect.bisect_right(samples, location))
  74
  75   # Iterate over the intervals defined by any pair of locations.
  76   for i in xrange(0, len(locations)):
  77     for j in xrange(i, len(locations)):
  78       # Compute length of interval and number of samples in the interval.
  79       length = locations[j] - locations[i]
  80       count = right[j] - left[i]
  81
  82       # Compute local discrepancy and update max_local_discrepancy.
  83       local_discrepancy = abs(float(count)*inv_sample_count - length)
  84       max_local_discrepancy = max(local_discrepancy, max_local_discrepancy)
  85
  86   return max_local_discrepancy
  87
  88
  89 def FrameDiscrepancy(frame_timestamps, absolute=True,
  90                      interval_multiplier=10000):
  91   """A discrepancy based metric for measuring jank.
  92
  93   FrameDiscrepancy quantifies the largest area of jank observed in a series
  94   of timestamps.  Note that this is different form metrics based on the
  95   max_frame_time. For example, the time stamp series A = [0,1,2,3,5,6] and
  96   B = [0,1,2,3,5,7] have the same max_frame_time = 2, but
  97   Discrepancy(B) > Discrepancy(A).
  98
  99   Two variants of discrepancy can be computed:
 100
 101   Relative discrepancy is following the original definition of
 102   discrepancy. It characterized the largest area of jank, relative to the
 103   duration of the entire time stamp series.  We normalize the raw results,
 104   because the best case discrepancy for a set of N samples is 1/N (for
 105   equally spaced samples), and we want our metric to report 0.0 in that
 106   case.
 107
 108   Absolute discrepancy also characterizes the largest area of jank, but its
 109   value wouldn't change (except for imprecisions due to a low
 110   interval_multiplier) if additional 'good' frames were added to an
 111   exisiting list of time stamps.  Its range is [0,inf] and the unit is
 112   milliseconds.
 113
 114   The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
 115   absolute discrepancy, but D has lower relative discrepancy than C.
 116   """
 117   if not frame_timestamps:
 118     return 1.0
 119   samples, sample_scale = NormalizeSamples(frame_timestamps)
 120   discrepancy = Discrepancy(samples, interval_multiplier)
 121   inv_sample_count = 1.0 / len(samples)
 122   if absolute:
 123     # Compute absolute discrepancy
 124     discrepancy /= sample_scale
 125   else:
 126     # Compute relative discrepancy
 127     discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))
 128   return discrepancy
 129
 130
 131 def ArithmeticMean(numerator, denominator):
 132   """Calculates arithmetic mean.
 133
 134   Both numerator and denominator can be given as either individual
 135   values or lists of values which will be summed.
 136
 137   Args:
 138     numerator: A quantity that represents a sum total value.
 139     denominator: A quantity that represents a count of the number of things.
 140
 141   Returns:
 142     The arithmetic mean value, or 0 if the denominator value was 0.
 143   """
 144   numerator_total = Total(numerator)
 145   denominator_total = Total(denominator)
 146   return DivideIfPossibleOrZero(numerator_total, denominator_total)
 147
 148
 149 def Total(data):
 150   """Returns the float value of a number or the sum of a list."""
 151   if type(data) == float:
 152     total = data
 153   elif type(data) == int:
 154     total = float(data)
 155   elif type(data) == list:
 156     total = float(sum(data))
 157   else:
 158     raise TypeError
 159   return total
 160
 161
 162 def DivideIfPossibleOrZero(numerator, denominator):
 163   """Returns the quotient, or zero if the denominator is zero."""
 164   if not denominator:
 165     return 0.0
 166   else:
 167     return numerator / denominator
 168
 169
 170 def GeneralizedMean(values, exponent):
 171   """See http://en.wikipedia.org/wiki/Generalized_mean"""
 172   if not values:
 173     return 0.0
 174   sum_of_powers = 0.0
 175   for v in values:
 176     sum_of_powers += v ** exponent
 177   return (sum_of_powers / len(values)) ** (1.0/exponent)
 178
 179
 180 def Median(values):
 181   """Gets the median of a list of values."""
 182   return Percentile(values, 50)
 183
 184
 185 def Percentile(values, percentile):
 186   """Calculates the value below which a given percentage of values fall.
 187
 188   For example, if 17% of the values are less than 5.0, then 5.0 is the 17th
 189   percentile for this set of values. When the percentage doesn't exactly
 190   match a rank in the list of values, the percentile is computed using linear
 191   interpolation between closest ranks.
 192
 193   Args:
 194     values: A list of numerical values.
 195     percentile: A number between 0 and 100.
 196
 197   Returns:
 198     The Nth percentile for the list of values, where N is the given percentage.
 199   """
 200   if not values:
 201     return 0.0
 202   sorted_values = sorted(values)
 203   n = len(values)
 204   percentile /= 100.0
 205   if percentile <= 0.5 / n:
 206     return sorted_values[0]
 207   elif percentile >= (n - 0.5) / n:
 208     return sorted_values[-1]
 209   else:
 210     floor_index = int(math.floor(n * percentile -  0.5))
 211     floor_value = sorted_values[floor_index]
 212     ceil_value = sorted_values[floor_index+1]
 213     alpha = n * percentile - 0.5 - floor_index
 214     return floor_value + alpha * (ceil_value - floor_value)
 215