[release-tools] Tool to find related commits
authorhablich <hablich@chromium.org>
Mon, 4 May 2015 12:20:10 +0000 (05:20 -0700)
committerCommit bot <commit-bot@chromium.org>
Mon, 4 May 2015 12:20:16 +0000 (12:20 +0000)
usage: This tool analyzes the commit range between <of> and <until>. It finds commits which belong together e.g. Implement/Revert pairs and Implement/Port/Revert triples. All supplied hashes need to be from the same branch e.g. master.

Example for M42: ./search_related_commits.py --prettyprint --separator e0110920d6f98f0ba2ac0d680f635ae3f094a04e b856e8785933a2a9cd884ab8966fee0e7098927e b1c2a3495624a9776c7df865d972886f2d078c10

BUG=
NOTRY=true

Review URL: https://codereview.chromium.org/1098123002

Cr-Commit-Position: refs/heads/master@{#28197}

tools/release/search_related_commits.py [new file with mode: 0755]
tools/release/test_search_related_commits.py [new file with mode: 0755]

diff --git a/tools/release/search_related_commits.py b/tools/release/search_related_commits.py
new file mode 100755 (executable)
index 0000000..aae2584
--- /dev/null
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+# Copyright 2015 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import argparse
+import operator
+import os
+import re
+from sets import Set
+from subprocess import Popen, PIPE
+import sys
+
+def search_all_related_commits(
+    git_working_dir, start_hash, until, separator, verbose=False):
+
+  all_commits_raw = _find_commits_inbetween(
+      start_hash, until, git_working_dir, verbose)
+  if verbose:
+    print "All commits between <of> and <until>: " + all_commits_raw
+
+  # Adding start hash too
+  all_commits = [start_hash]
+  all_commits.extend(all_commits_raw.splitlines())
+  all_related_commits = {}
+  already_treated_commits = Set([])
+  for commit in all_commits:
+    if commit in already_treated_commits:
+      continue
+
+    related_commits = _search_related_commits(
+        git_working_dir, commit, until, separator, verbose)
+    if len(related_commits) > 0:
+      all_related_commits[commit] = related_commits
+      already_treated_commits.update(related_commits)
+
+    already_treated_commits.update(commit)
+
+  return all_related_commits
+
+def _search_related_commits(
+    git_working_dir, start_hash, until, separator, verbose=False):
+
+  if separator:
+    commits_between = _find_commits_inbetween(
+        start_hash, separator, git_working_dir, verbose)
+    if commits_between == "":
+      return []
+
+  # Extract commit position
+  original_message = _git_execute(
+      git_working_dir,
+      ["show", "-s", "--format=%B", start_hash],
+      verbose)
+  title = original_message.splitlines()[0]
+
+  matches = re.search("(\{#)([0-9]*)(\})", original_message)
+
+  if not matches:
+    return []
+
+  commit_position = matches.group(2)
+  if verbose:
+    print "1.) Commit position to look for: " + commit_position
+
+  search_range = start_hash + ".." + until
+
+  def git_args(grep_pattern):
+    return [
+      "log",
+      "--reverse",
+      "--grep=" + grep_pattern,
+      "--format=%H",
+      search_range,
+    ]
+
+  found_by_hash = _git_execute(
+      git_working_dir, git_args(start_hash), verbose).strip()
+
+  if verbose:
+    print "2.) Found by hash: " + found_by_hash
+
+  found_by_commit_pos = _git_execute(
+      git_working_dir, git_args(commit_position), verbose).strip()
+
+  if verbose:
+    print "3.) Found by commit position: " + found_by_commit_pos
+
+  # Replace brackets or else they are wrongly interpreted by --grep
+  title = title.replace("[", "\\[")
+  title = title.replace("]", "\\]")
+
+  found_by_title = _git_execute(
+      git_working_dir, git_args(title), verbose).strip()
+
+  if verbose:
+    print "4.) Found by title: " + found_by_title
+
+  hits = (
+      _convert_to_array(found_by_hash) +
+      _convert_to_array(found_by_commit_pos) +
+      _convert_to_array(found_by_title))
+  hits = _remove_duplicates(hits)
+
+  if separator:
+    for current_hit in hits:
+      commits_between = _find_commits_inbetween(
+          separator, current_hit, git_working_dir, verbose)
+      if commits_between != "":
+        return hits
+    return []
+
+  return hits
+
+def _find_commits_inbetween(start_hash, end_hash, git_working_dir, verbose):
+  commits_between = _git_execute(
+        git_working_dir,
+        ["rev-list", "--reverse", start_hash + ".." + end_hash],
+        verbose)
+  return commits_between.strip()
+
+def _convert_to_array(string_of_hashes):
+  return string_of_hashes.splitlines()
+
+def _remove_duplicates(array):
+   no_duplicates = []
+   for current in array:
+    if not current in no_duplicates:
+      no_duplicates.append(current)
+   return no_duplicates
+
+def _git_execute(working_dir, args, verbose=False):
+  command = ["git", "-C", working_dir] + args
+  if verbose:
+    print "Git working dir: " + working_dir
+    print "Executing git command:" + str(command)
+  p = Popen(args=command, stdin=PIPE,
+            stdout=PIPE, stderr=PIPE)
+  output, err = p.communicate()
+  rc = p.returncode
+  if rc != 0:
+    raise Exception(err)
+  if verbose:
+    print "Git return value: " + output
+  return output
+
+def _pretty_print_entry(hash, git_dir, pre_text, verbose):
+  text_to_print = _git_execute(
+      git_dir,
+      ["show",
+       "--quiet",
+       "--date=iso",
+       hash,
+       "--format=%ad # %H # %s"],
+      verbose)
+  return pre_text + text_to_print.strip()
+
+def main(options):
+    all_related_commits = search_all_related_commits(
+        options.git_dir,
+        options.of[0],
+        options.until[0],
+        options.separator,
+        options.verbose)
+
+    sort_key = lambda x: (
+        _git_execute(
+            options.git_dir,
+            ["show", "--quiet", "--date=iso", x, "--format=%ad"],
+            options.verbose)).strip()
+
+    high_level_commits = sorted(all_related_commits.keys(), key=sort_key)
+
+    for current_key in high_level_commits:
+      if options.prettyprint:
+        yield _pretty_print_entry(
+            current_key,
+            options.git_dir,
+            "+",
+            options.verbose)
+      else:
+        yield "+" + current_key
+
+      found_commits = all_related_commits[current_key]
+      for current_commit in found_commits:
+        if options.prettyprint:
+          yield _pretty_print_entry(
+              current_commit,
+              options.git_dir,
+              "| ",
+              options.verbose)
+        else:
+          yield "| " + current_commit
+
+if __name__ == "__main__":  # pragma: no cover
+  parser = argparse.ArgumentParser(
+      "This tool analyzes the commit range between <of> and <until>. "
+      "It finds commits which belong together e.g. Implement/Revert pairs and "
+      "Implement/Port/Revert triples. All supplied hashes need to be "
+      "from the same branch e.g. master.")
+  parser.add_argument("-g", "--git-dir", required=False, default=".",
+                        help="The path to your git working directory.")
+  parser.add_argument("--verbose", action="store_true",
+      help="Enables a very verbose output")
+  parser.add_argument("of", nargs=1,
+      help="Hash of the commit to be searched.")
+  parser.add_argument("until", nargs=1,
+      help="Commit when searching should stop")
+  parser.add_argument("--separator", required=False,
+      help="The script will only list related commits "
+            "which are separated by hash <--separator>.")
+  parser.add_argument("--prettyprint", action="store_true",
+      help="Pretty prints the output")
+
+  args = sys.argv[1:]
+  options = parser.parse_args(args)
+  for current_line in main(options):
+    print current_line
diff --git a/tools/release/test_search_related_commits.py b/tools/release/test_search_related_commits.py
new file mode 100755 (executable)
index 0000000..cf61236
--- /dev/null
@@ -0,0 +1,274 @@
+#!/usr/bin/env python
+# Copyright 2015 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from collections import namedtuple
+from os import path
+import search_related_commits
+import shutil
+from subprocess import Popen, PIPE, check_call
+import unittest
+
+
+TEST_CONFIG = {
+  "GIT_REPO": "/tmp/test-v8-search-related-commits",
+}
+
+class TestSearchRelatedCommits(unittest.TestCase):
+
+  base_dir = TEST_CONFIG["GIT_REPO"]
+
+  def _execute_git(self, git_args):
+
+    fullCommand = ["git", "-C", self.base_dir] + git_args
+    p = Popen(args=fullCommand, stdin=PIPE,
+        stdout=PIPE, stderr=PIPE)
+    output, err = p.communicate()
+    rc = p.returncode
+    if rc != 0:
+      raise Exception(err)
+    return output
+
+  def setUp(self):
+    if path.exists(self.base_dir):
+      shutil.rmtree(self.base_dir)
+
+    check_call(["git", "init", self.base_dir])
+
+    # Initial commit
+    message = """[turbofan] Sanitize language mode for javascript operators.
+
+    R=mstarzinger@chromium.org
+
+    Review URL: https://codereview.chromium.org/1084243005
+
+    Cr-Commit-Position: refs/heads/master@{#28059}"""
+    self._make_empty_commit(message)
+
+    message = """[crankshaft] Do some stuff
+
+    R=hablich@chromium.org
+
+    Review URL: https://codereview.chromium.org/1084243007
+
+    Cr-Commit-Position: refs/heads/master@{#28030}"""
+
+    self._make_empty_commit(message)
+
+  def tearDown(self):
+    if path.exists(self.base_dir):
+      shutil.rmtree(self.base_dir)
+
+  def _assert_correct_standard_result(
+      self, result, all_commits, hash_of_first_commit):
+    self.assertEqual(len(result), 1, "Master commit not found")
+    self.assertTrue(
+        result.get(hash_of_first_commit),
+        "Master commit is wrong")
+
+    self.assertEqual(
+        len(result[hash_of_first_commit]),
+        1,
+        "Child commit not found")
+    self.assertEqual(
+        all_commits[2],
+        result[hash_of_first_commit][0],
+        "Child commit wrong")
+
+  def _get_commits(self):
+    commits = self._execute_git(
+        ["log", "--format=%H", "--reverse"]).splitlines()
+    return commits
+
+  def _make_empty_commit(self, message):
+    self._execute_git(["commit", "--allow-empty", "-m", message])
+
+  def testSearchByCommitPosition(self):
+    message = """Revert of some stuff.
+    > Cr-Commit-Position: refs/heads/master@{#28059}
+    R=mstarzinger@chromium.org
+
+    Review URL: https://codereview.chromium.org/1084243005
+
+    Cr-Commit-Position: refs/heads/master@{#28088}"""
+
+    self._make_empty_commit(message)
+
+    commits = self._get_commits()
+    hash_of_first_commit = commits[0]
+
+    result = search_related_commits.search_all_related_commits(
+        self.base_dir, hash_of_first_commit, "HEAD", None)
+
+    self._assert_correct_standard_result(result, commits, hash_of_first_commit)
+
+  def testSearchByTitle(self):
+    message = """Revert of some stuff.
+    > [turbofan] Sanitize language mode for javascript operators.
+    > Cr-Commit-Position: refs/heads/master@{#289}
+    R=mstarzinger@chromium.org
+
+    Review URL: https://codereview.chromium.org/1084243005
+
+    Cr-Commit-Position: refs/heads/master@{#28088}"""
+
+    self._make_empty_commit(message)
+
+    commits = self._get_commits()
+    hash_of_first_commit = commits[0]
+
+    result = search_related_commits.search_all_related_commits(
+        self.base_dir, hash_of_first_commit, "HEAD", None)
+
+    self._assert_correct_standard_result(result, commits, hash_of_first_commit)
+
+  def testSearchByHash(self):
+    commits = self._get_commits()
+    hash_of_first_commit = commits[0]
+
+    message = """Revert of some stuff.
+    > [turbofan] Sanitize language mode for javascript operators.
+    > Reverting """ + hash_of_first_commit + """
+    > R=mstarzinger@chromium.org
+
+    Review URL: https://codereview.chromium.org/1084243005
+
+    Cr-Commit-Position: refs/heads/master@{#28088}"""
+
+    self._make_empty_commit(message)
+
+    #Fetch again for an update
+    commits = self._get_commits()
+    hash_of_first_commit = commits[0]
+
+    result = search_related_commits.search_all_related_commits(
+        self.base_dir,
+        hash_of_first_commit,
+        "HEAD",
+        None)
+
+    self._assert_correct_standard_result(result, commits, hash_of_first_commit)
+
+  def testConsiderSeparator(self):
+    commits = self._get_commits()
+    hash_of_first_commit = commits[0]
+
+    # Related commits happen before separator so it is not a hit
+    message = """Revert of some stuff: Not a hit
+    > [turbofan] Sanitize language mode for javascript operators.
+    > Reverting """ + hash_of_first_commit + """
+    > R=mstarzinger@chromium.org
+
+    Review URL: https://codereview.chromium.org/1084243005
+
+    Cr-Commit-Position: refs/heads/master@{#28088}"""
+    self._make_empty_commit(message)
+
+    # Related commits happen before and after separator so it is a hit
+    commit_pos_of_master = "27088"
+    message = """Implement awesome feature: Master commit
+
+    Review URL: https://codereview.chromium.org/1084243235
+
+    Cr-Commit-Position: refs/heads/master@{#""" + commit_pos_of_master + "}"
+    self._make_empty_commit(message)
+
+    # Separator commit
+    message = """Commit which is the origin of the branch
+
+    Review URL: https://codereview.chromium.org/1084243456
+
+    Cr-Commit-Position: refs/heads/master@{#28173}"""
+    self._make_empty_commit(message)
+
+    # Filler commit
+    message = "Some unrelated commit: Not a hit"
+    self._make_empty_commit(message)
+
+    # Related commit after separator: a hit
+    message = "Patch r" + commit_pos_of_master +""" done
+
+    Review URL: https://codereview.chromium.org/1084243235
+
+    Cr-Commit-Position: refs/heads/master@{#29567}"""
+    self._make_empty_commit(message)
+
+    #Fetch again for an update
+    commits = self._get_commits()
+    hash_of_first_commit = commits[0]
+    hash_of_hit = commits[3]
+    hash_of_separator = commits[4]
+    hash_of_child_hit = commits[6]
+
+    result = search_related_commits.search_all_related_commits(
+        self.base_dir,
+        hash_of_first_commit,
+        "HEAD",
+        hash_of_separator)
+
+    self.assertTrue(result.get(hash_of_hit), "Hit not found")
+    self.assertEqual(len(result), 1, "More than one hit found")
+    self.assertEqual(
+        len(result.get(hash_of_hit)),
+        1,
+        "More than one child hit found")
+    self.assertEqual(
+        result.get(hash_of_hit)[0],
+        hash_of_child_hit,
+        "Wrong commit found")
+
+  def testPrettyPrint(self):
+    message = """Revert of some stuff.
+    > [turbofan] Sanitize language mode for javascript operators.
+    > Cr-Commit-Position: refs/heads/master@{#289}
+    R=mstarzinger@chromium.org
+
+    Review URL: https://codereview.chromium.org/1084243005
+
+    Cr-Commit-Position: refs/heads/master@{#28088}"""
+
+    self._make_empty_commit(message)
+
+    commits = self._get_commits()
+    hash_of_first_commit = commits[0]
+    OptionsStruct = namedtuple(
+        "OptionsStruct",
+        "git_dir of until all prettyprint separator verbose")
+    options = OptionsStruct(
+        git_dir= self.base_dir,
+        of= [hash_of_first_commit],
+        until= [commits[2]],
+        all= True,
+        prettyprint= True,
+        separator = None,
+        verbose=False)
+    output = []
+    for current_line in search_related_commits.main(options):
+      output.append(current_line)
+
+    self.assertIs(len(output), 2, "Not exactly two entries written")
+    self.assertTrue(output[0].startswith("+"), "Master entry not marked with +")
+    self.assertTrue(output[1].startswith("| "), "Child entry not marked with |")
+
+  def testNothingFound(self):
+    commits = self._get_commits()
+
+    self._execute_git(["commit", "--allow-empty", "-m", "A"])
+    self._execute_git(["commit", "--allow-empty", "-m", "B"])
+    self._execute_git(["commit", "--allow-empty", "-m", "C"])
+    self._execute_git(["commit", "--allow-empty", "-m", "D"])
+
+    hash_of_first_commit = commits[0]
+    result = search_related_commits.search_all_related_commits(
+        self.base_dir,
+        hash_of_first_commit,
+        "HEAD",
+        None)
+
+    self.assertEqual(len(result), 0, "Results found where none should be.")
+
+
+if __name__ == "__main__":
+  #import sys;sys.argv = ['', 'Test.testName']
+   unittest.main()