2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Script to parse perf data from Chrome Endure test executions, to be graphed.
8 This script connects via HTTP to a buildbot master in order to scrape and parse
9 perf data from Chrome Endure tests that have been run. The perf data is then
10 stored in local text files to be graphed by the Chrome Endure graphing code.
12 It is assumed that any Chrome Endure tests that show up on the waterfall have
13 names that are of the following form:
15 "endure_<webapp_name>-<test_name>"
17 This script accepts either a URL or a local path as a buildbot location.
18 It switches its behavior if a URL is given, or a local path is given.
20 When a URL is given, it gets buildbot logs from the buildbot builders URL
21 e.g. http://build.chromium.org/p/chromium.endure/builders/.
23 When a local path is given, it gets buildbot logs from buildbot's internal
24 files in the directory e.g. /home/chrome-bot/buildbot.
42 CHROME_ENDURE_SLAVE_NAMES = [
48 'Linux QA Perf (dbg)(0)',
49 'Linux QA Perf (dbg)(1)',
50 'Linux QA Perf (dbg)(2)',
51 'Linux QA Perf (dbg)(3)',
52 'Linux QA Perf (dbg)(4)',
55 BUILDER_URL_BASE = 'http://build.chromium.org/p/chromium.endure/builders/'
56 LAST_BUILD_NUM_PROCESSED_FILE = os.path.join(os.path.dirname(__file__),
57 '_parser_last_processed.txt')
58 LOCAL_GRAPH_DIR = '/home/%s/www/chrome_endure_clean' % getpass.getuser()
59 MANGLE_TRANSLATION = string.maketrans(' ()', '___')
61 def SetupBaseGraphDirIfNeeded(webapp_name, test_name, dest_dir):
62 """Sets up the directory containing results for a particular test, if needed.
65 webapp_name: The string name of the webapp associated with the given test.
66 test_name: The string name of the test.
67 dest_dir: The name of the destination directory that needs to be set up.
69 if not os.path.exists(dest_dir):
70 os.mkdir(dest_dir) # Test name directory.
71 os.chmod(dest_dir, 0755)
74 config_file = os.path.join(dest_dir, 'config.js')
75 if not os.path.exists(config_file):
76 with open(config_file, 'w') as f:
77 f.write('var Config = {\n')
78 f.write('buildslave: "Chrome Endure Bots",\n')
79 f.write('title: "Chrome Endure %s Test: %s",\n' % (webapp_name.upper(),
82 os.chmod(config_file, 0755)
84 # Set up symbolic links to the real graphing files.
85 link_file = os.path.join(dest_dir, 'index.html')
86 if not os.path.exists(link_file):
87 os.symlink('../../endure_plotter.html', link_file)
88 link_file = os.path.join(dest_dir, 'endure_plotter.js')
89 if not os.path.exists(link_file):
90 os.symlink('../../endure_plotter.js', link_file)
91 link_file = os.path.join(dest_dir, 'js')
92 if not os.path.exists(link_file):
93 os.symlink('../../js', link_file)
96 def WriteToDataFile(new_line, existing_lines, revision, data_file):
97 """Writes a new entry to an existing perf data file to be graphed.
99 If there's an existing line with the same revision number, overwrite its data
100 with the new line. Else, prepend the info for the new revision.
103 new_line: A dictionary representing perf information for the new entry.
104 existing_lines: A list of string lines from the existing perf data file.
105 revision: The string revision number associated with the new perf entry.
106 data_file: The string name of the perf data file to which to write.
109 for i, line in enumerate(existing_lines):
110 line_dict = simplejson.loads(line)
111 if line_dict['rev'] == revision:
112 existing_lines[i] = simplejson.dumps(new_line)
115 elif int(line_dict['rev']) < int(revision):
118 existing_lines.insert(0, simplejson.dumps(new_line))
120 with open(data_file, 'w') as f:
121 f.write('\n'.join(existing_lines))
122 os.chmod(data_file, 0755)
125 def OutputPerfData(revision, graph_name, values, units, units_x, dest_dir,
126 is_stacked=False, stack_order=[]):
127 """Outputs perf data to a local text file to be graphed.
130 revision: The string revision number associated with the perf data.
131 graph_name: The string name of the graph on which to plot the data.
132 values: A dict which maps a description to a value. A value is either a
133 single data value to be graphed, or a list of 2-tuples
134 representing (x, y) points to be graphed for long-running tests.
135 units: The string description for the y-axis units on the graph.
136 units_x: The string description for the x-axis units on the graph. Should
137 be set to None if the results are not for long-running graphs.
138 dest_dir: The name of the destination directory to which to write.
139 is_stacked: True to draw a "stacked" graph. First-come values are
140 stacked at bottom by default.
141 stack_order: A list that contains key strings in the order to stack values
144 # Update graphs.dat, which contains metadata associated with each graph.
146 graphs_file = os.path.join(dest_dir, 'graphs.dat')
147 if os.path.exists(graphs_file):
148 with open(graphs_file, 'r') as f:
149 existing_graphs = simplejson.loads(f.read())
151 for graph in existing_graphs:
152 if graph['name'] == graph_name:
162 new_graph['units_x'] = units_x
163 existing_graphs.append(new_graph)
164 existing_graphs = sorted(existing_graphs, key=lambda x: x['name'])
165 with open(graphs_file, 'w') as f:
166 f.write(simplejson.dumps(existing_graphs, indent=2))
167 os.chmod(graphs_file, 0755)
169 # Update summary data file, containing the actual data to be graphed.
170 data_file_name = graph_name + '-summary.dat'
172 data_file = os.path.join(dest_dir, data_file_name)
173 if os.path.exists(data_file):
174 with open(data_file, 'r') as f:
175 existing_lines = f.readlines()
176 existing_lines = map(lambda x: x.strip(), existing_lines)
178 for description in values:
179 value = values[description]
183 points.append([str(point[0]), str(point[1])])
184 new_traces[description] = points
186 new_traces[description] = [str(value), str(0.0)]
188 'traces': new_traces,
192 new_line['stack'] = True
193 new_line['stack_order'] = stack_order
195 WriteToDataFile(new_line, existing_lines, revision, data_file)
198 def OutputEventData(revision, event_dict, dest_dir):
199 """Outputs event data to a local text file to be graphed.
202 revision: The string revision number associated with the event data.
203 event_dict: A dict which maps a description to an array of tuples
204 representing event data to be graphed.
205 dest_dir: The name of the destination directory to which to write.
207 data_file_name = '_EVENT_-summary.dat'
209 data_file = os.path.join(dest_dir, data_file_name)
210 if os.path.exists(data_file):
211 with open(data_file, 'r') as f:
212 existing_lines = f.readlines()
213 existing_lines = map(lambda x: x.strip(), existing_lines)
216 for description in event_dict:
217 event_list = event_dict[description]
219 for event_time, event_data in event_list:
220 value_list.append([str(event_time), event_data])
221 new_events[description] = value_list
228 WriteToDataFile(new_line, existing_lines, revision, data_file)
231 def UpdatePerfDataFromFetchedContent(
232 revision, content, webapp_name, test_name, graph_dir, only_dmp=False):
233 """Update perf data from fetched stdio data.
236 revision: The string revision number associated with the new perf entry.
237 content: Fetched stdio data.
238 webapp_name: A name of the webapp.
239 test_name: A name of the test.
240 graph_dir: A path to the graph directory.
241 only_dmp: True if only Deep Memory Profiler results should be used.
245 def AppendRawPerfData(graph_name, description, value, units, units_x,
246 webapp_name, test_name, is_stacked=False):
247 perf_data_raw.append({
248 'graph_name': graph_name,
249 'description': description,
253 'webapp_name': webapp_name,
254 'test_name': test_name,
258 # First scan for short-running perf test results.
259 for match in re.findall(
260 r'RESULT ([^:]+): ([^=]+)= ([-\d\.]+) (\S+)', content):
261 if (not only_dmp) or match[0].endswith('-DMP'):
263 match2 = eval(match[2])
267 AppendRawPerfData(match[0], match[1], match2, match[3], None,
268 webapp_name, webapp_name)
270 # Next scan for long-running perf test results.
271 for match in re.findall(
272 r'RESULT ([^:]+): ([^=]+)= (\[[^\]]+\]) (\S+) (\S+)', content):
273 if (not only_dmp) or match[0].endswith('-DMP'):
275 match2 = eval(match[2])
278 # TODO(dmikurube): Change the condition to use stacked graph when we
279 # determine how to specify it.
281 AppendRawPerfData(match[0], match[1], match2, match[3], match[4],
282 webapp_name, test_name, match[0].endswith('-DMP'))
284 # Next scan for events in the test results.
285 for match in re.findall(
286 r'RESULT _EVENT_: ([^=]+)= (\[[^\]]+\])', content):
288 match1 = eval(match[1])
292 AppendRawPerfData('_EVENT_', match[0], match1, None, None,
293 webapp_name, test_name)
295 # For each graph_name/description pair that refers to a long-running test
296 # result or an event, concatenate all the results together (assume results
297 # in the input file are in the correct order). For short-running test
298 # results, keep just one if more than one is specified.
299 perf_data = {} # Maps a graph-line key to a perf data dictionary.
300 for data in perf_data_raw:
301 key_graph = data['graph_name']
302 key_description = data['description']
303 if not key_graph in perf_data:
304 perf_data[key_graph] = {
305 'graph_name': data['graph_name'],
307 'units': data['units'],
308 'units_x': data['units_x'],
309 'webapp_name': data['webapp_name'],
310 'test_name': data['test_name'],
312 perf_data[key_graph]['stack'] = data['stack']
313 if 'stack_order' not in perf_data[key_graph]:
314 perf_data[key_graph]['stack_order'] = []
315 if (data['stack'] and
316 data['description'] not in perf_data[key_graph]['stack_order']):
317 perf_data[key_graph]['stack_order'].append(data['description'])
319 if data['graph_name'] != '_EVENT_' and not data['units_x']:
320 # Short-running test result.
321 perf_data[key_graph]['value'][key_description] = data['value']
323 # Long-running test result or event.
324 if key_description in perf_data[key_graph]['value']:
325 perf_data[key_graph]['value'][key_description] += data['value']
327 perf_data[key_graph]['value'][key_description] = data['value']
329 # Finally, for each graph-line in |perf_data|, update the associated local
330 # graph data files if necessary.
331 for perf_data_key in perf_data:
332 perf_data_dict = perf_data[perf_data_key]
334 dest_dir = os.path.join(graph_dir, perf_data_dict['webapp_name'])
335 if not os.path.exists(dest_dir):
336 os.mkdir(dest_dir) # Webapp name directory.
337 os.chmod(dest_dir, 0755)
338 dest_dir = os.path.join(dest_dir, perf_data_dict['test_name'])
340 SetupBaseGraphDirIfNeeded(perf_data_dict['webapp_name'],
341 perf_data_dict['test_name'], dest_dir)
342 if perf_data_dict['graph_name'] == '_EVENT_':
343 OutputEventData(revision, perf_data_dict['value'], dest_dir)
345 OutputPerfData(revision, perf_data_dict['graph_name'],
346 perf_data_dict['value'],
347 perf_data_dict['units'], perf_data_dict['units_x'],
349 perf_data_dict['stack'], perf_data_dict['stack_order'])
352 def SlaveLocation(master_location, slave_info):
353 """Returns slave location for |master_location| and |slave_info|."""
354 if master_location.startswith('http://'):
355 return master_location + urllib.quote(slave_info['slave_name'])
357 return os.path.join(master_location,
358 slave_info['slave_name'].translate(MANGLE_TRANSLATION))
361 def GetRevisionAndLogs(slave_location, build_num):
362 """Get a revision number and log locations.
365 slave_location: A URL or a path to the build slave data.
366 build_num: A build number.
369 A pair of the revision number and a list of strings that contain locations
370 of logs. (False, []) in case of error.
372 if slave_location.startswith('http://'):
373 location = slave_location + '/builds/' + str(build_num)
375 location = os.path.join(slave_location, str(build_num))
381 if location.startswith('http://'):
382 fp = urllib2.urlopen(location)
384 revisions = re.findall(r'<td class="left">got_revision</td>\s+'
385 '<td>(\d+)</td>\s+<td>Source</td>', contents)
387 revision = revisions[0]
388 logs = [location + link + '/text' for link
389 in re.findall(r'(/steps/endure[^/]+/logs/stdio)', contents)]
391 fp = open(location, 'rb')
392 build = cPickle.load(fp)
393 properties = build.getProperties()
394 if properties.has_key('got_revision'):
395 revision = build.getProperty('got_revision')
396 candidates = os.listdir(slave_location)
397 logs = [os.path.join(slave_location, filename)
398 for filename in candidates
399 if re.match(r'%d-log-endure[^/]+-stdio' % build_num, filename)]
401 except urllib2.URLError, e:
402 logging.exception('Error reading build URL "%s": %s', location, str(e))
404 except (IOError, OSError), e:
405 logging.exception('Error reading build file "%s": %s', location, str(e))
411 return revision, logs
414 def ExtractTestNames(log_location, is_dbg):
415 """Extract test names from |log_location|.
418 A dict of a log location, webapp's name and test's name. False if error.
420 if log_location.startswith('http://'):
421 location = urllib.unquote(log_location)
422 test_pattern = r'endure_([^_]+)(_test |-)([^/]+)/'
424 location = log_location
425 test_pattern = r'endure_([^_]+)(_test_|-)([^/]+)-stdio'
428 webapp_name = match[0]
429 webapp_name = webapp_name + '_dbg' if is_dbg else webapp_name
433 'location': log_location,
434 'webapp_name': webapp_name,
435 'test_name': test_name,
439 def GetStdioContents(stdio_location):
440 """Gets appropriate stdio contents.
443 A content string of the stdio log. None in case of error.
448 if stdio_location.startswith('http://'):
449 fp = urllib2.urlopen(stdio_location, timeout=60)
450 # Since in-progress test output is sent chunked, there's no EOF. We need
451 # to specially handle this case so we don't hang here waiting for the
453 start_time = time.time()
459 if time.time() - start_time >= 30: # Read for at most 30 seconds.
462 fp = open(stdio_location)
467 # Buildbot log files are stored in the netstring format.
468 # http://en.wikipedia.org/wiki/Netstring
469 while index < len(data):
471 while data[index2].isdigit():
473 if data[index2] != ':':
474 logging.error('Log file is not in expected format: %s' %
478 length = int(data[index:index2])
480 channel = int(data[index])
482 if data[index+length-1] != ',':
483 logging.error('Log file is not in expected format: %s' %
488 contents += data[index:(index+length-1)]
491 except (urllib2.URLError, socket.error, IOError, OSError), e:
492 # Issue warning but continue to the next stdio link.
493 logging.warning('Error reading test stdio data "%s": %s',
494 stdio_location, str(e))
502 def UpdatePerfDataForSlaveAndBuild(
503 slave_info, build_num, graph_dir, master_location):
504 """Process updated perf data for a particular slave and build number.
507 slave_info: A dictionary containing information about the slave to process.
508 build_num: The particular build number on the slave to process.
509 graph_dir: A path to the graph directory.
510 master_location: A URL or a path to the build master data.
513 True if the perf data for the given slave/build is updated properly, or
514 False if any critical error occurred.
516 if not master_location.startswith('http://'):
518 from buildbot.status import builder
520 slave_location = SlaveLocation(master_location, slave_info)
521 logging.debug(' %s, build %d.', slave_info['slave_name'], build_num)
522 is_dbg = '(dbg)' in slave_info['slave_name']
524 revision, logs = GetRevisionAndLogs(slave_location, build_num)
529 for log_location in logs:
530 stdio = ExtractTestNames(log_location, is_dbg)
536 stdio_location = stdio['location']
537 contents = GetStdioContents(stdio_location)
540 UpdatePerfDataFromFetchedContent(revision, contents,
541 stdio['webapp_name'],
548 def GetMostRecentBuildNum(master_location, slave_name):
549 """Gets the most recent buld number for |slave_name| in |master_location|."""
550 most_recent_build_num = None
552 if master_location.startswith('http://'):
553 slave_url = master_location + urllib.quote(slave_name)
558 fp = urllib2.urlopen(slave_url, timeout=60)
559 url_contents = fp.read()
560 except urllib2.URLError, e:
561 logging.exception('Error reading builder URL: %s', str(e))
567 matches = re.findall(r'/(\d+)/stop', url_contents)
569 most_recent_build_num = int(matches[0])
571 matches = re.findall(r'#(\d+)</a></td>', url_contents)
573 most_recent_build_num = sorted(map(int, matches), reverse=True)[0]
576 slave_path = os.path.join(master_location,
577 slave_name.translate(MANGLE_TRANSLATION))
578 files = os.listdir(slave_path)
579 number_files = [int(filename) for filename in files if filename.isdigit()]
581 most_recent_build_num = sorted(number_files, reverse=True)[0]
583 if most_recent_build_num:
584 logging.debug('%s most recent build number: %s',
585 slave_name, most_recent_build_num)
587 logging.error('Could not identify latest build number for slave %s.',
590 return most_recent_build_num
593 def UpdatePerfDataFiles(graph_dir, master_location):
594 """Updates the Chrome Endure graph data files with the latest test results.
596 For each known Chrome Endure slave, we scan its latest test results looking
597 for any new test data. Any new data that is found is then appended to the
598 data files used to display the Chrome Endure graphs.
601 graph_dir: A path to the graph directory.
602 master_location: A URL or a path to the build master data.
605 True if all graph data files are updated properly, or
606 False if any error occurred.
609 for slave_name in CHROME_ENDURE_SLAVE_NAMES:
611 slave_info['slave_name'] = slave_name
612 slave_info['most_recent_build_num'] = None
613 slave_info['last_processed_build_num'] = None
614 slave_list.append(slave_info)
616 # Identify the most recent build number for each slave.
617 logging.debug('Searching for latest build numbers for each slave...')
618 for slave in slave_list:
619 slave_name = slave['slave_name']
620 slave['most_recent_build_num'] = GetMostRecentBuildNum(
621 master_location, slave_name)
623 # Identify the last-processed build number for each slave.
624 logging.debug('Identifying last processed build numbers...')
625 if not os.path.exists(LAST_BUILD_NUM_PROCESSED_FILE):
626 for slave_info in slave_list:
627 slave_info['last_processed_build_num'] = 0
629 with open(LAST_BUILD_NUM_PROCESSED_FILE, 'r') as fp:
630 file_contents = fp.read()
631 for match in re.findall(r'([^:]+):(\d+)', file_contents):
632 slave_name = match[0].strip()
633 last_processed_build_num = match[1].strip()
634 for slave_info in slave_list:
635 if slave_info['slave_name'] == slave_name:
636 slave_info['last_processed_build_num'] = int(
637 last_processed_build_num)
638 for slave_info in slave_list:
639 if not slave_info['last_processed_build_num']:
640 slave_info['last_processed_build_num'] = 0
641 logging.debug('Done identifying last processed build numbers.')
643 # For each Chrome Endure slave, process each build in-between the last
644 # processed build num and the most recent build num, inclusive. To process
645 # each one, first get the revision number for that build, then scan the test
646 # result stdio for any performance data, and add any new performance data to
647 # local files to be graphed.
648 for slave_info in slave_list:
649 logging.debug('Processing %s, builds %d-%d...',
650 slave_info['slave_name'],
651 slave_info['last_processed_build_num'],
652 slave_info['most_recent_build_num'])
653 curr_build_num = slave_info['last_processed_build_num']
654 while curr_build_num <= slave_info['most_recent_build_num']:
655 if not UpdatePerfDataForSlaveAndBuild(slave_info, curr_build_num,
656 graph_dir, master_location):
657 # Do not give up. The first files might be removed by buildbot.
658 logging.warning('Logs do not exist in buildbot for #%d of %s.' %
659 (curr_build_num, slave_info['slave_name']))
662 # Log the newly-processed build numbers.
663 logging.debug('Logging the newly-processed build numbers...')
664 with open(LAST_BUILD_NUM_PROCESSED_FILE, 'w') as f:
665 for slave_info in slave_list:
666 f.write('%s:%s\n' % (slave_info['slave_name'],
667 slave_info['most_recent_build_num']))
672 def GenerateIndexPage(graph_dir):
673 """Generates a summary (landing) page for the Chrome Endure graphs.
676 graph_dir: A path to the graph directory.
678 logging.debug('Generating new index.html page...')
685 <title>Chrome Endure Overview</title>
686 <script language="javascript">
687 function DisplayGraph(name, graph) {
689 '<td><iframe scrolling="no" height="438" width="700" src="');
690 document.write(name);
691 document.write('"></iframe></td>');
703 # Print current time.
704 page += '<p>Updated: %s</p>\n' % (
705 time.strftime('%A, %B %d, %Y at %I:%M:%S %p %Z'))
707 # Links for each webapp.
708 webapp_names = [x for x in os.listdir(graph_dir) if
709 x not in ['js', 'old_data', '.svn', '.git'] and
710 os.path.isdir(os.path.join(graph_dir, x))]
711 webapp_names = sorted(webapp_names)
714 for i, name in enumerate(webapp_names):
715 page += '<a href="#%s">%s</a>' % (name.upper(), name.upper())
716 if i < len(webapp_names) - 1:
720 # Print out the data for each webapp.
721 for webapp_name in webapp_names:
722 page += '\n<h1 id="%s">%s</h1>\n' % (webapp_name.upper(),
725 # Links for each test for this webapp.
726 test_names = [x for x in
727 os.listdir(os.path.join(graph_dir, webapp_name))]
728 test_names = sorted(test_names)
731 for i, name in enumerate(test_names):
732 page += '<a href="#%s">%s</a>' % (name, name)
733 if i < len(test_names) - 1:
737 # Print out the data for each test for this webapp.
738 for test_name in test_names:
739 # Get the set of graph names for this test.
740 graph_names = [x[:x.find('-summary.dat')] for x in
741 os.listdir(os.path.join(graph_dir,
742 webapp_name, test_name))
743 if '-summary.dat' in x and '_EVENT_' not in x]
744 graph_names = sorted(graph_names)
746 page += '<h2 id="%s">%s</h2>\n' % (test_name, test_name)
749 for i, graph_name in enumerate(graph_names):
752 page += (' <script>DisplayGraph("%s/%s?graph=%s&lookout=1");'
753 '</script>\n' % (webapp_name, test_name, graph_name))
756 if len(graph_names) % 2 == 1:
768 index_file = os.path.join(graph_dir, 'index.html')
769 with open(index_file, 'w') as f:
771 os.chmod(index_file, 0755)
775 parser = optparse.OptionParser()
777 '-v', '--verbose', action='store_true', default=False,
778 help='Use verbose logging.')
780 '-s', '--stdin', action='store_true', default=False,
781 help='Input from stdin instead of slaves for testing this script.')
783 '-b', '--buildbot', dest='buildbot', metavar="BUILDBOT",
784 default=BUILDER_URL_BASE,
785 help='Use log files in a buildbot at BUILDBOT. BUILDBOT can be a '
786 'buildbot\'s builder URL or a local path to a buildbot directory. '
787 'Both an absolute path and a relative path are available, e.g. '
788 '"/home/chrome-bot/buildbot" or "../buildbot". '
789 '[default: %default]')
791 '-g', '--graph', dest='graph_dir', metavar="DIR", default=LOCAL_GRAPH_DIR,
792 help='Output graph data files to DIR. [default: %default]')
793 options, _ = parser.parse_args(sys.argv)
795 logging_level = logging.DEBUG if options.verbose else logging.INFO
796 logging.basicConfig(level=logging_level,
797 format='[%(asctime)s] %(levelname)s: %(message)s')
800 content = sys.stdin.read()
801 UpdatePerfDataFromFetchedContent(
802 '12345', content, 'webapp', 'test', options.graph_dir)
804 if options.buildbot.startswith('http://'):
805 master_location = options.buildbot
807 build_dir = os.path.join(options.buildbot, 'build')
808 third_party_dir = os.path.join(build_dir, 'third_party')
809 sys.path.append(third_party_dir)
810 sys.path.append(os.path.join(third_party_dir, 'buildbot_8_4p1'))
811 sys.path.append(os.path.join(third_party_dir, 'twisted_10_2'))
812 master_location = os.path.join(build_dir, 'masters',
813 'master.chromium.endure')
814 success = UpdatePerfDataFiles(options.graph_dir, master_location)
816 logging.error('Failed to update perf data files.')
819 GenerateIndexPage(options.graph_dir)
820 logging.debug('All done!')
823 if __name__ == '__main__':