From fa1eaaa843367b5b732ed923dc5522e258e6b27a Mon Sep 17 00:00:00 2001
From: kelvinly <kelvinly@google.com>
Date: Thu, 12 Jun 2014 11:27:40 -0700
Subject: [PATCH] Added in framework to get more bench data

BUG=skia:
NOTRY=true
R=jcgregorio@google.com, bensong@google.com

Author: kelvinly@google.com

Review URL: https://codereview.chromium.org/331683003
---
 bench/gen_bench_expectations.py | 92 +++++++++++++++++++++++++++++++--
 1 file changed, 88 insertions(+), 4 deletions(-)

diff --git a/bench/gen_bench_expectations.py b/bench/gen_bench_expectations.py
index 6d44b6cda2..4edc38c09d 100644
--- a/bench/gen_bench_expectations.py
+++ b/bench/gen_bench_expectations.py
@@ -7,9 +7,11 @@
 
 import argparse
 import bench_util
+import json
 import os
 import re
 import sys
+import urllib2
 
 # Parameters for calculating bench ranges.
 RANGE_RATIO_UPPER = 1.5  # Ratio of range for upper bounds.
@@ -36,12 +38,17 @@ CONFIGS_TO_INCLUDE = ['simple_viewport_1000x1000',
 ENTRIES_TO_EXCLUDE = [
                      ]
 
+_GS_CLOUD_FORMAT = 'http://storage.googleapis.com/chromium-skia-gm/perfdata/%s/%s'
 
-def compute_ranges(benches):
+def compute_ranges(benches, more_benches=None):
   """Given a list of bench numbers, calculate the alert range.
 
   Args:
     benches: a list of float bench values.
+    more_benches: a tuple of lists of additional bench values.
+      The first value of each tuple is the number of commits before the current
+      one that set of values is at, and the second value is a list of
+      bench results.
 
   Returns:
     a list of float [lower_bound, upper_bound].
@@ -55,7 +62,7 @@ def compute_ranges(benches):
           maximum + diff*RANGE_RATIO_UPPER + avg*ERR_RATIO + ERR_UB]
 
 
-def create_expectations_dict(revision_data_points, builder):
+def create_expectations_dict(revision_data_points, builder, extra_data=None):
   """Convert list of bench data points into a dictionary of expectations data.
 
   Args:
@@ -81,13 +88,59 @@ def create_expectations_dict(revision_data_points, builder):
     if to_skip:
       continue
     key = (point.config, point.bench)
+
+    extras = []
+    for idx, dataset in extra_data:
+      for data in dataset:
+        if (data.bench == point.bench and data.config == point.config and
+              data.time_type == point.time_type and data.per_iter_time):
+          extras.append((idx, data.per_iter_time))
+
     if key in bench_dict:
       raise Exception('Duplicate bench entry: ' + str(key))
-    bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time)
+    bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time, extras)
 
   return bench_dict
 
 
+def get_parent_commits(start_hash, num_back):
+  """Returns a list of commits that are the parent of the commit passed in."""
+  list_commits = urllib2.urlopen(
+      'https://skia.googlesource.com/skia/+log/%s?format=json&n=%d' %
+      (start_hash, num_back))
+  # NOTE: Very brittle. Removes the four extraneous characters
+  # so json can be read successfully
+  trunc_list = list_commits.read()[4:]
+  json_data = json.loads(trunc_list)
+  return [revision['commit'] for revision in json_data['log']]
+
+
+def get_file_suffixes(commit_hash, directory):
+  """Gets all the suffixes available in the directory"""
+  possible_files = os.listdir(directory)
+  prefix = 'bench_' + commit_hash + '_data_'
+  return [name[len(prefix):] for name in possible_files
+      if name.startswith(prefix)]
+
+
+def download_bench_data(builder, commit_hash, suffixes, directory):
+  """Downloads data, returns the number successfully downloaded"""
+  cur_files = os.listdir(directory)
+  count = 0
+  for suffix in suffixes:
+    file_name = 'bench_'+commit_hash+'_data_'+suffix
+    if file_name in cur_files:
+      continue
+    try:
+      src = urllib2.urlopen(_GS_CLOUD_FORMAT % (builder, file_name))
+      with open(os.path.join(directory, file_name), 'w') as dest:
+        dest.writelines(src)
+        count += 1
+    except urllib2.HTTPError:
+      pass
+  return count
+
+
 def main():
     """Reads bench data points, then calculate and export expectations.
     """
@@ -107,6 +160,13 @@ def main():
     parser.add_argument(
         '-r', '--git_revision', required=True,
         help='the git hash to indicate the revision of input data to use.')
+    parser.add_argument(
+        '-t', '--back_track', required=False, default=10,
+        help='the number of commit hashes backwards to look to include' +
+             'in the calculations.')
+    parser.add_argument(
+        '-m', '--max_commits', required=False, default=1,
+        help='the number of commit hashes to include in the calculations.')
     args = parser.parse_args()
 
     builder = args.builder
@@ -114,7 +174,31 @@ def main():
     data_points = bench_util.parse_skp_bench_data(
         args.input_dir, args.git_revision, args.representation_alg)
 
-    expectations_dict = create_expectations_dict(data_points, builder)
+    parent_commits = get_parent_commits(args.git_revision, args.back_track)
+    print "Using commits: {}".format(parent_commits)
+    suffixes = get_file_suffixes(args.git_revision, args.input_dir)
+    print "Using suffixes: {}".format(suffixes)
+
+    # TODO(kelvinly): Find a better approach to than directly copying from
+    # the GS server?
+    downloaded_commits = []
+    for idx, commit in enumerate(parent_commits):
+      num_downloaded = download_bench_data(
+          builder, commit, suffixes, args.input_dir)
+      if num_downloaded > 0:
+        downloaded_commits.append((num_downloaded, idx, commit))
+
+    if len(downloaded_commits) < args.max_commits:
+      print ('Less than desired number of commits found. Please increase'
+            '--back_track in later runs')
+    trunc_commits = sorted(downloaded_commits, reverse=True)[:args.max_commits]
+    extra_data = []
+    for _, idx, commit in trunc_commits:
+      extra_data.append((idx, bench_util.parse_skp_bench_data(
+          args.input_dir, commit, args.representation_alg)))
+
+    expectations_dict = create_expectations_dict(data_points, builder,
+                                                 extra_data)
 
     out_lines = []
     keys = expectations_dict.keys()
-- 
2.34.1