From 2a1704f362faf03e899dbe86a269e224b8b5b29f Mon Sep 17 00:00:00 2001 From: "commit-bot@chromium.org" Date: Tue, 25 Mar 2014 18:02:17 +0000 Subject: [PATCH] Further tweak compare_codereview.py. - only show details for bots where at least one side failed - dedup identical failures BUG=skia: R=halcanary@google.com, mtklein@google.com Author: mtklein@chromium.org Review URL: https://codereview.chromium.org/202113008 git-svn-id: http://skia.googlecode.com/svn/trunk@13942 2bbb7eff-a529-9590-31e7-b0007b416f81 --- tools/compare_codereview.py | 711 +++++++++++++++++++++++--------------------- 1 file changed, 365 insertions(+), 346 deletions(-) diff --git a/tools/compare_codereview.py b/tools/compare_codereview.py index b39bfe2..a58b3c6 100755 --- a/tools/compare_codereview.py +++ b/tools/compare_codereview.py @@ -23,373 +23,392 @@ import HTMLParser class CodeReviewHTMLParser(HTMLParser.HTMLParser): - """Parses CodeReview web page. + """Parses CodeReview web page. - Use the CodeReviewHTMLParser.parse static function to make use of - this class. + Use the CodeReviewHTMLParser.parse static function to make use of + this class. - This uses the HTMLParser class because it's the best thing in - Python's standard library. We need a little more power than a - regex. [Search for "You can't parse [X]HTML with regex." for more - information. + This uses the HTMLParser class because it's the best thing in + Python's standard library. We need a little more power than a + regex. [Search for "You can't parse [X]HTML with regex." for more + information. + """ + # pylint: disable=I0011,R0904 + @staticmethod + def parse(url): + """Parses a CodeReview web pages. + + Args: + url (string), a codereview URL like this: + 'https://codereview.chromium.org/?????????'. + + Returns: + A dictionary; the keys are bot_name strings, the values + are CodeReviewHTMLParser.Status objects + """ + parser = CodeReviewHTMLParser() + try: + parser.feed(urllib2.urlopen(url).read()) + except (urllib2.URLError,): + print >> sys.stderr, 'Error getting', url + return None + parser.close() + return parser.statuses + + # namedtuples are like lightweight structs in Python. The low + # overhead of a tuple, but the ease of use of an object. + Status = collections.namedtuple('Status', ['status', 'url']) + + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self._id = None + self._status = None + self._href = None + self._anchor_data = '' + self._currently_parsing_trybotdiv = False + # statuses is a dictionary of CodeReviewHTMLParser.Status + self.statuses = {} + + def handle_starttag(self, tag, attrs): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the start of a tag + (e.g.
). + + The tag argument is the name of the tag converted to lower + case. The attrs argument is a list of (name, value) pairs + containing the attributes found inside the tag's <> + brackets. The name will be translated to lower case, and + quotes in the value have been removed, and character and + entity references have been replaced. + + For instance, for the tag , this + method would be called as handle_starttag('a', [('href', + 'http://www.cwi.nl/')]). + [[end standard library documentation]] """ - # pylint: disable=I0011,R0904 - @staticmethod - def parse(url): - """Parses a CodeReview web pages. - - Args: - url (string), a codereview URL like this: - 'https://codereview.chromium.org/?????????'. - - Returns: - A dictionary; the keys are bot_name strings, the values - are CodeReviewHTMLParser.Status objects - """ - parser = CodeReviewHTMLParser() - try: - parser.feed(urllib2.urlopen(url).read()) - except (urllib2.URLError,): - print >> sys.stderr, 'Error getting', url - return None - parser.close() - return parser.statuses - - # namedtuples are like lightweight structs in Python. The low - # overhead of a tuple, but the ease of use of an object. - Status = collections.namedtuple('Status', ['status', 'url']) - - def __init__(self): - HTMLParser.HTMLParser.__init__(self) - self._id = None - self._status = None - self._href = None - self._anchor_data = '' - self._currently_parsing_trybotdiv = False - # statuses is a dictionary of CodeReviewHTMLParser.Status - self.statuses = {} - - def handle_starttag(self, tag, attrs): - """Overrides the HTMLParser method to implement functionality. - - [[begin standard library documentation]] - This method is called to handle the start of a tag - (e.g.
). - - The tag argument is the name of the tag converted to lower - case. The attrs argument is a list of (name, value) pairs - containing the attributes found inside the tag's <> - brackets. The name will be translated to lower case, and - quotes in the value have been removed, and character and - entity references have been replaced. - - For instance, for the tag , this - method would be called as handle_starttag('a', [('href', - 'http://www.cwi.nl/')]). - [[end standard library documentation]] - """ - attrs = dict(attrs) - if tag == 'div': - # We are looking for
. - id_attr = attrs.get('id','') - if id_attr.startswith('tryjobdiv'): - self._id = id_attr - if (self._id and tag == 'a' - and 'build-result' in attrs.get('class', '').split()): - # If we are already inside a ). The tag argument is the name of the tag - converted to lower case. - [[end standard library documentation]] - """ - if tag == 'a' and self._status: - # We take the accumulated self._anchor_data and save it as - # the bot name. - bot = self._anchor_data.strip() - stat = CodeReviewHTMLParser.Status(status=self._status, - url=self._href) - if bot: - # Add to accumulating dictionary. - self.statuses[bot] = stat - # Reset state to search for the next bot. - self._currently_parsing_trybotdiv = False - self._anchor_data = '' - self._status = None - self._href = None + attrs = dict(attrs) + if tag == 'div': + # We are looking for
. + id_attr = attrs.get('id','') + if id_attr.startswith('tryjobdiv'): + self._id = id_attr + if (self._id and tag == 'a' + and 'build-result' in attrs.get('class', '').split()): + # If we are already inside a ). The tag argument is the name of the tag + converted to lower case. + [[end standard library documentation]] + """ + if tag == 'a' and self._status: + # We take the accumulated self._anchor_data and save it as + # the bot name. + bot = self._anchor_data.strip() + stat = CodeReviewHTMLParser.Status(status=self._status, + url=self._href) + if bot: + # Add to accumulating dictionary. + self.statuses[bot] = stat + # Reset state to search for the next bot. + self._currently_parsing_trybotdiv = False + self._anchor_data = '' + self._status = None + self._href = None class BuilderHTMLParser(HTMLParser.HTMLParser): - """parses Trybot web pages. + """parses Trybot web pages. - Use the BuilderHTMLParser.parse static function to make use of - this class. - - This uses the HTMLParser class because it's the best thing in - Python's standard library. We need a little more power than a - regex. [Search for "You can't parse [X]HTML with regex." for more - information. - """ - # pylint: disable=I0011,R0904 - @staticmethod - def parse(url): - """Parses a Trybot web page. - - Args: - url (string), a trybot result URL. - - Returns: - An array of BuilderHTMLParser.Results, each a description - of failure results, along with an optional url - """ - parser = BuilderHTMLParser() - try: - parser.feed(urllib2.urlopen(url).read()) - except (urllib2.URLError,): - print >> sys.stderr, 'Error getting', url - return [] - parser.close() - return parser.failure_results - - Result = collections.namedtuple('Result', ['text', 'url']) - - def __init__(self): - HTMLParser.HTMLParser.__init__(self) - self.failure_results = [] - self._current_failure_result = None - self._divlevel = None - self._li_level = 0 - self._li_data = '' - self._current_failure = False - self._failure_results_url = '' + Use the BuilderHTMLParser.parse static function to make use of + this class. - def handle_starttag(self, tag, attrs): - """Overrides the HTMLParser method to implement functionality. - - [[begin standard library documentation]] - This method is called to handle the start of a tag - (e.g.
). - - The tag argument is the name of the tag converted to lower - case. The attrs argument is a list of (name, value) pairs - containing the attributes found inside the tag's <> - brackets. The name will be translated to lower case, and - quotes in the value have been removed, and character and - entity references have been replaced. - - For instance, for the tag , this - method would be called as handle_starttag('a', [('href', - 'http://www.cwi.nl/')]). - [[end standard library documentation]] - """ - attrs = dict(attrs) - if tag == 'li': - #
  • tags can be nested. So we have to count the - # nest-level for backing out. - self._li_level += 1 - return - if tag == 'div' and attrs.get('class') == 'failure result': - # We care about this sort of thing: - #
  • - #
  • - #
  • - #
    ...
    - #
  • - # - # We want this text here. - # - if self._li_level > 0: - self._current_failure = True # Tells us to keep text. - return - - if tag == 'a' and self._current_failure: - href = attrs.get('href') - # Sometimes we want to keep the stdio url. We always - # return it, just in case. - if href.endswith('/logs/stdio'): - self._failure_results_url = href - - def handle_data(self, data): - """Overrides the HTMLParser method to implement functionality. - - [[begin standard library documentation]] - This method is called to process arbitrary data (e.g. text - nodes and the content of and - ). - [[end standard library documentation]] - """ - if self._current_failure: - self._li_data += data - - def handle_endtag(self, tag): - """Overrides the HTMLParser method to implement functionality. - - [[begin standard library documentation]] - This method is called to handle the end tag of an element - (e.g.
    ). The tag argument is the name of the tag - converted to lower case. - [[end standard library documentation]] - """ - if tag == 'li': - self._li_level -= 1 - if 0 == self._li_level: - if self._current_failure: - result = self._li_data.strip() - first = result.split()[0] - if first: - result = re.sub( - r'^%s(\s+%s)+' % (first, first), first, result) - # Sometimes, it repeats the same thing - # multiple times. - result = re.sub(r'unexpected flaky.*', '', result) - # Remove some extra unnecessary text. - result = re.sub(r'\bpreamble\b', '', result) - result = re.sub(r'\bstdio\b', '', result) - url = self._failure_results_url - self.failure_results.append( - BuilderHTMLParser.Result(result, url)) - self._current_failure_result = None - # Reset the state. - self._current_failure = False - self._li_data = '' - self._failure_results_url = '' + This uses the HTMLParser class because it's the best thing in + Python's standard library. We need a little more power than a + regex. [Search for "You can't parse [X]HTML with regex." for more + information. + """ + # pylint: disable=I0011,R0904 + @staticmethod + def parse(url): + """Parses a Trybot web page. + Args: + url (string), a trybot result URL. -def printer(indent, string): - """Print indented, wrapped text. + Returns: + An array of BuilderHTMLParser.Results, each a description + of failure results, along with an optional url """ - def wrap_to(line, columns): - """Wrap a line to the given number of columns, return a list - of strings. - """ - ret = [] - nextline = '' - for word in line.split(): - if nextline: - if len(nextline) + 1 + len(word) > columns: - ret.append(nextline) - nextline = word - else: - nextline += (' ' + word) - else: - nextline = word - if nextline: - ret.append(nextline) - return ret - out = sys.stdout - spacer = ' ' - for line in string.split('\n'): - for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))): - out.write(spacer * indent) - if i > 0: - out.write(spacer) - out.write(wrapped_line) - out.write('\n') - out.flush() + parser = BuilderHTMLParser() + try: + parser.feed(urllib2.urlopen(url).read()) + except (urllib2.URLError,): + print >> sys.stderr, 'Error getting', url + return [] + parser.close() + return parser.failure_results + + Result = collections.namedtuple('Result', ['text', 'url']) + + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self.failure_results = [] + self._current_failure_result = None + self._divlevel = None + self._li_level = 0 + self._li_data = '' + self._current_failure = False + self._failure_results_url = '' + + def handle_starttag(self, tag, attrs): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the start of a tag + (e.g.
    ). + + The tag argument is the name of the tag converted to lower + case. The attrs argument is a list of (name, value) pairs + containing the attributes found inside the tag's <> + brackets. The name will be translated to lower case, and + quotes in the value have been removed, and character and + entity references have been replaced. + + For instance, for the tag , this + method would be called as handle_starttag('a', [('href', + 'http://www.cwi.nl/')]). + [[end standard library documentation]] + """ + attrs = dict(attrs) + if tag == 'li': + #
  • tags can be nested. So we have to count the + # nest-level for backing out. + self._li_level += 1 + return + if tag == 'div' and attrs.get('class') == 'failure result': + # We care about this sort of thing: + #
  • + #
  • + #
  • + #
    ...
    + #
  • + # + # We want this text here. + # + if self._li_level > 0: + self._current_failure = True # Tells us to keep text. + return + + if tag == 'a' and self._current_failure: + href = attrs.get('href') + # Sometimes we want to keep the stdio url. We always + # return it, just in case. + if href.endswith('/logs/stdio'): + self._failure_results_url = href + + def handle_data(self, data): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to process arbitrary data (e.g. text + nodes and the content of and + ). + [[end standard library documentation]] + """ + if self._current_failure: + self._li_data += data + def handle_endtag(self, tag): + """Overrides the HTMLParser method to implement functionality. -def main(control_url, roll_url, verbosity=1): - """Compare two Codereview URLs + [[begin standard library documentation]] + This method is called to handle the end tag of an element + (e.g.
    ). The tag argument is the name of the tag + converted to lower case. + [[end standard library documentation]] + """ + if tag == 'li': + self._li_level -= 1 + if 0 == self._li_level: + if self._current_failure: + result = self._li_data.strip() + first = result.split()[0] + if first: + result = re.sub( + r'^%s(\s+%s)+' % (first, first), first, result) + # Sometimes, it repeats the same thing + # multiple times. + result = re.sub(r'unexpected flaky.*', '', result) + # Remove some extra unnecessary text. + result = re.sub(r'\bpreamble\b', '', result) + result = re.sub(r'\bstdio\b', '', result) + url = self._failure_results_url + self.failure_results.append( + BuilderHTMLParser.Result(result, url)) + self._current_failure_result = None + # Reset the state. + self._current_failure = False + self._li_data = '' + self._failure_results_url = '' - Args: - control_url, roll_url: (strings) URL of the format - https://codereview.chromium.org/????????? - verbosity: (int) verbose level. 0, 1, or 2. +def printer(indent, string): + """Print indented, wrapped text. + """ + def wrap_to(line, columns): + """Wrap a line to the given number of columns, return a list + of strings. """ - # pylint: disable=I0011,R0914,R0912 - control = CodeReviewHTMLParser.parse(control_url) - roll = CodeReviewHTMLParser.parse(roll_url) - all_bots = set(control) & set(roll) # Set intersection. - if not all_bots: - print >> sys.stderr, ( - 'Error: control %s and roll %s have no common trybots.' - % (list(control), list(roll))) - return - - control_name = '[control %s]' % control_url.split('/')[-1] - roll_name = '[roll %s]' % roll_url.split('/')[-1] + ret = [] + nextline = '' + for word in line.split(): + if nextline: + if len(nextline) + 1 + len(word) > columns: + ret.append(nextline) + nextline = word + else: + nextline += (' ' + word) + else: + nextline = word + if nextline: + ret.append(nextline) + return ret + out = sys.stdout + spacer = ' ' + for line in string.split('\n'): + for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))): + out.write(spacer * indent) + if i > 0: + out.write(spacer) + out.write(wrapped_line) + out.write('\n') + out.flush() - out = sys.stdout - for bot in sorted(all_bots): - if (roll[bot].status == 'success'): - if verbosity > 1: - printer(0, '==%s==' % bot) - printer(1, 'OK') - continue +def main(control_url, roll_url, verbosity=1): + """Compare two Codereview URLs + + Args: + control_url, roll_url: (strings) URL of the format + https://codereview.chromium.org/????????? + + verbosity: (int) verbose level. 0, 1, or 2. + """ + # pylint: disable=I0011,R0914,R0912 + control = CodeReviewHTMLParser.parse(control_url) + roll = CodeReviewHTMLParser.parse(roll_url) + all_bots = set(control) & set(roll) # Set intersection. + if not all_bots: + print >> sys.stderr, ( + 'Error: control %s and roll %s have no common trybots.' + % (list(control), list(roll))) + return + + control_name = '[control %s]' % control_url.split('/')[-1] + roll_name = '[roll %s]' % roll_url.split('/')[-1] + + out = sys.stdout + + for bot in sorted(all_bots): + if (roll[bot].status == 'success'): + if verbosity > 1: printer(0, '==%s==' % bot) + printer(1, 'OK') + continue - for (status, name, url) in ( + if control[bot].status != 'failure' and roll[bot].status != 'failure': + continue + printer(0, '==%s==' % bot) + + formatted_results = [] + for (status, name, url) in [ (control[bot].status, control_name, control[bot].url), - (roll[bot].status, roll_name, roll[bot].url)): - - if status == 'failure': - printer(1, name) - results = BuilderHTMLParser.parse(url) - for result in results: - formatted_result = re.sub( - r'(\S*\.html) ', '\n__\g<1>\n', result.text) - # Strip runtimes. - formatted_result = re.sub(r'\(.*\)', '', formatted_result) - printer(2, formatted_result) - if ('compile' in result.text - or '...and more' in result.text): - printer(3, re.sub('/[^/]*$', '/', url) + result.url) - else: - printer(1, name) - printer(2, status) - out.write('\n') - - if verbosity > 0: - # Print out summary of all of the bots. - out.write('%11s %11s %4s %s\n\n' % - ('CONTROL', 'ROLL', 'DIFF', 'BOT')) - for bot in sorted(all_bots): - if roll[bot].status == 'success': - diff = '' - elif (control[bot].status == 'success' and - roll[bot].status == 'failure'): - diff = '!!!!' - elif ('pending' in control[bot].status or - 'pending' in roll[bot].status): - diff = '....' - else: - diff = '****' - out.write('%11s %11s %4s %s\n' % ( - control[bot].status, roll[bot].status, diff, bot)) - out.write('\n') - out.flush() + ( roll[bot].status, roll_name, roll[bot].url)]: + lines = [] + if status == 'failure': + results = BuilderHTMLParser.parse(url) + for result in results: + formatted_result = re.sub(r'(\S*\.html) ', '\n__\g<1>\n', result.text) + # Strip runtimes. + formatted_result = re.sub(r'\(.*\)', '', formatted_result) + lines.append((2, formatted_result)) + if ('compile' in result.text or '...and more' in result.text): + lines.append((3, re.sub('/[^/]*$', '/', url) + result.url)) + formatted_results.append(lines) + + identical = formatted_results[0] == formatted_results[1] + + + for (formatted_result, (status, name, url)) in zip( + formatted_results, + [(control[bot].status, control_name, control[bot].url), + (roll[bot].status, roll_name, roll[bot].url)]): + if status != 'failure' and not identical: + printer(1, name) + printer(2, status) + elif status == 'failure': + if identical: + printer(1, control_name + ' and ' + roll_name + ' failed identically') + else: + printer(1, name) + for (indent, line) in formatted_result: + printer(indent, line) + if identical: + break + out.write('\n') + + if verbosity > 0: + # Print out summary of all of the bots. + out.write('%11s %11s %4s %s\n\n' % + ('CONTROL', 'ROLL', 'DIFF', 'BOT')) + for bot in sorted(all_bots): + if roll[bot].status == 'success': + diff = '' + elif (control[bot].status == 'success' and + roll[bot].status == 'failure'): + diff = '!!!!' + elif ('pending' in control[bot].status or + 'pending' in roll[bot].status): + diff = '....' + else: + diff = '****' + out.write('%11s %11s %4s %s\n' % ( + control[bot].status, roll[bot].status, diff, bot)) + out.write('\n') + out.flush() if __name__ == '__main__': - if len(sys.argv) < 3: - print >> sys.stderr, __doc__ - exit(1) - main(sys.argv[1], sys.argv[2], - int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1))) + if len(sys.argv) < 3: + print >> sys.stderr, __doc__ + exit(1) + main(sys.argv[1], sys.argv[2], + int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1))) -- 2.7.4