1 # SPDX-License-Identifier: GPL-2.0+
2 # Copyright (c) 2011 The Chromium OS Authors.
5 """Handles parsing a stream of commits/emails from 'git log' or other source"""
17 from patman import command
18 from patman import commit
19 from patman import gitutil
20 from patman.series import Series
22 # Tags that we detect and remove
23 RE_REMOVE = re.compile(r'^BUG=|^TEST=|^BRANCH=|^Review URL:'
24 r'|Reviewed-on:|Commit-\w*:')
26 # Lines which are allowed after a TEST= line
27 RE_ALLOWED_AFTER_TEST = re.compile('^Signed-off-by:')
30 RE_SIGNOFF = re.compile('^Signed-off-by: *(.*)')
33 RE_COVER = re.compile('^Cover-([a-z-]*): *(.*)')
36 RE_SERIES_TAG = re.compile('^Series-([a-z-]*): *(.*)')
38 # Change-Id will be used to generate the Message-Id and then be stripped
39 RE_CHANGE_ID = re.compile('^Change-Id: *(.*)')
42 RE_COMMIT_TAG = re.compile('^Commit-([a-z-]*): *(.*)')
44 # Commit tags that we want to collect and keep
45 RE_TAG = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc|Fixes): (.*)')
47 # The start of a new commit in the git log
48 RE_COMMIT = re.compile('^commit ([0-9a-f]*)$')
50 # We detect these since checkpatch doesn't always do it
51 RE_SPACE_BEFORE_TAB = re.compile('^[+].* \t')
53 # Match indented lines for changes
54 RE_LEADING_WHITESPACE = re.compile(r'^\s')
56 # Detect a 'diff' line
57 RE_DIFF = re.compile(r'^>.*diff --git a/(.*) b/(.*)$')
59 # Detect a context line, like '> @@ -153,8 +153,13 @@ CheckPatch
60 RE_LINE = re.compile(r'>.*@@ \-(\d+),\d+ \+(\d+),\d+ @@ *(.*)')
62 # States we can be in - can we use range() and still have comments?
63 STATE_MSG_HEADER = 0 # Still in the message header
64 STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit)
65 STATE_PATCH_HEADER = 2 # In patch header (after the subject)
66 STATE_DIFFS = 3 # In the diff part (past --- line)
69 """Class for detecting/injecting tags in a patch or series of patches
71 We support processing the output of 'git log' to read out the tags we
72 are interested in. We can also process a patch file in order to remove
73 unwanted tags or inject additional ones. These correspond to the two
76 def __init__(self, series, is_log=False):
77 self.skip_blank = False # True to skip a single blank line
78 self.found_test = False # Found a TEST= line
79 self.lines_after_test = 0 # Number of lines found after TEST=
80 self.linenum = 1 # Output line number we are up to
81 self.in_section = None # Name of start...END section we are in
82 self.notes = [] # Series notes
83 self.section = [] # The current section...END section
84 self.series = series # Info about the patch series
85 self.is_log = is_log # True if indent like git log
86 self.in_change = None # Name of the change list we are in
87 self.change_version = 0 # Non-zero if we are in a change list
88 self.change_lines = [] # Lines of the current change
89 self.blank_count = 0 # Number of blank lines stored up
90 self.state = STATE_MSG_HEADER # What state are we in?
91 self.commit = None # Current commit
92 # List of unquoted test blocks, each a list of str lines
94 self.cur_diff = None # Last 'diff' line seen (str)
95 self.cur_line = None # Last context (@@) line seen (str)
96 self.recent_diff = None # 'diff' line for current snippet (str)
97 self.recent_line = None # '@@' line for current snippet (str)
98 self.recent_quoted = collections.deque([], 5)
99 self.recent_unquoted = queue.Queue()
100 self.was_quoted = None
103 def process_text(text, is_comment=False):
104 """Process some text through this class using a default Commit/Series
107 text (str): Text to parse
108 is_comment (bool): True if this is a comment rather than a patch.
109 If True, PatchStream doesn't expect a patch subject at the
110 start, but jumps straight into the body
113 PatchStream: object with results
115 pstrm = PatchStream(Series())
116 pstrm.commit = commit.Commit(None)
117 infd = io.StringIO(text)
118 outfd = io.StringIO()
120 pstrm.state = STATE_PATCH_HEADER
121 pstrm.process_stream(infd, outfd)
124 def _add_warn(self, warn):
125 """Add a new warning to report to the user about the current commit
127 The new warning is added to the current commit if not already present.
130 warn (str): Warning to report
133 ValueError: Warning is generated with no commit associated
136 raise ValueError('Warning outside commit: %s' % warn)
137 if warn not in self.commit.warn:
138 self.commit.warn.append(warn)
140 def _add_to_series(self, line, name, value):
141 """Add a new Series-xxx tag.
143 When a Series-xxx tag is detected, we come here to record it, if we
144 are scanning a 'git log'.
147 line (str): Source line containing tag (useful for debug/error
149 name (str): Tag name (part after 'Series-')
150 value (str): Tag value (part after 'Series-xxx: ')
153 self.in_section = name
154 self.skip_blank = False
156 warn = self.series.AddTag(self.commit, line, name, value)
158 self.commit.warn.append(warn)
160 def _add_to_commit(self, name):
161 """Add a new Commit-xxx tag.
163 When a Commit-xxx tag is detected, we come here to record it.
166 name (str): Tag name (part after 'Commit-')
169 self.in_section = 'commit-' + name
170 self.skip_blank = False
172 def _add_commit_rtag(self, rtag_type, who):
173 """Add a response tag to the current commit
176 rtag_type (str): rtag type (e.g. 'Reviewed-by')
177 who (str): Person who gave that rtag, e.g.
178 'Fred Bloggs <fred@bloggs.org>'
180 self.commit.AddRtag(rtag_type, who)
182 def _close_commit(self):
183 """Save the current commit into our commit list, and reset our state"""
184 if self.commit and self.is_log:
185 self.series.AddCommit(self.commit)
187 # If 'END' is missing in a 'Cover-letter' section, and that section
188 # happens to show up at the very end of the commit message, this is
189 # the chance for us to fix it up.
190 if self.in_section == 'cover' and self.is_log:
191 self.series.cover = self.section
192 self.in_section = None
193 self.skip_blank = True
197 self.recent_diff = None
198 self.recent_line = None
200 def _parse_version(self, value, line):
201 """Parse a version from a *-changes tag
204 value (str): Tag value (part after 'xxx-changes: '
205 line (str): Source line containing tag
208 int: The version as an integer
211 ValueError: the value cannot be converted
216 raise ValueError("%s: Cannot decode version info '%s'" %
217 (self.commit.hash, line))
219 def _finalise_change(self):
220 """_finalise a (multi-line) change and add it to the series or commit"""
221 if not self.change_lines:
223 change = '\n'.join(self.change_lines)
225 if self.in_change == 'Series':
226 self.series.AddChange(self.change_version, self.commit, change)
227 elif self.in_change == 'Cover':
228 self.series.AddChange(self.change_version, None, change)
229 elif self.in_change == 'Commit':
230 self.commit.AddChange(self.change_version, change)
231 self.change_lines = []
233 def _finalise_snippet(self):
234 """Finish off a snippet and add it to the list
236 This is called when we get to the end of a snippet, i.e. the we enter
237 the next block of quoted text:
239 This is a comment from someone.
243 > Now we have some code <----- end of snippet
246 Now a comment about the above code
248 This adds the snippet to our list
251 while self.recent_quoted:
252 quoted_lines.append(self.recent_quoted.popleft())
255 while not self.recent_unquoted.empty():
256 text = self.recent_unquoted.get()
257 if not (text.startswith('On ') and text.endswith('wrote:')):
258 unquoted_lines.append(text)
264 lines.append('> File: %s' % self.recent_diff)
266 out = '> Line: %s / %s' % self.recent_line[:2]
267 if self.recent_line[2]:
268 out += ': %s' % self.recent_line[2]
270 lines += quoted_lines + unquoted_lines
272 self.snippets.append(lines)
274 def process_line(self, line):
275 """Process a single line of a patch file or commit log
277 This process a line and returns a list of lines to output. The list
278 may be empty or may contain multiple output lines.
280 This is where all the complicated logic is located. The class's
281 state is used to move between different states and detect things
284 We can be in one of two modes:
285 self.is_log == True: This is 'git log' mode, where most output is
286 indented by 4 characters and we are scanning for tags
288 self.is_log == False: This is 'patch' mode, where we already have
289 all the tags, and are processing patches to remove junk we
290 don't want, and add things we think are required.
293 line (str): text line to process
296 list: list of output lines, or [] if nothing should be output
299 ValueError: a fatal error occurred while parsing, e.g. an END
300 without a starting tag, or two commits with two change IDs
302 # Initially we have no output. Prepare the input line string
304 line = line.rstrip('\n')
306 commit_match = RE_COMMIT.match(line) if self.is_log else None
312 # Handle state transition and skipping blank lines
313 series_tag_match = RE_SERIES_TAG.match(line)
314 change_id_match = RE_CHANGE_ID.match(line)
315 commit_tag_match = RE_COMMIT_TAG.match(line)
316 cover_match = RE_COVER.match(line)
317 signoff_match = RE_SIGNOFF.match(line)
318 leading_whitespace_match = RE_LEADING_WHITESPACE.match(line)
319 diff_match = RE_DIFF.match(line)
320 line_match = RE_LINE.match(line)
322 if self.state == STATE_PATCH_HEADER:
323 tag_match = RE_TAG.match(line)
324 is_blank = not line.strip()
326 if (self.state == STATE_MSG_HEADER
327 or self.state == STATE_PATCH_SUBJECT):
330 # We don't have a subject in the text stream of patch files
331 # It has its own line with a Subject: tag
332 if not self.is_log and self.state == STATE_PATCH_SUBJECT:
335 self.state = STATE_MSG_HEADER
337 # If a tag is detected, or a new commit starts
338 if series_tag_match or commit_tag_match or change_id_match or \
339 cover_match or signoff_match or self.state == STATE_MSG_HEADER:
340 # but we are already in a section, this means 'END' is missing
341 # for that section, fix it up.
343 self._add_warn("Missing 'END' in section '%s'" % self.in_section)
344 if self.in_section == 'cover':
345 self.series.cover = self.section
346 elif self.in_section == 'notes':
348 self.series.notes += self.section
349 elif self.in_section == 'commit-notes':
351 self.commit.notes += self.section
353 # This should not happen
354 raise ValueError("Unknown section '%s'" % self.in_section)
355 self.in_section = None
356 self.skip_blank = True
358 # but we are already in a change list, that means a blank line
359 # is missing, fix it up.
361 self._add_warn("Missing 'blank line' in section '%s-changes'" %
363 self._finalise_change()
364 self.in_change = None
365 self.change_version = 0
367 # If we are in a section, keep collecting lines until we see END
370 if self.in_section == 'cover':
371 self.series.cover = self.section
372 elif self.in_section == 'notes':
374 self.series.notes += self.section
375 elif self.in_section == 'commit-notes':
377 self.commit.notes += self.section
379 # This should not happen
380 raise ValueError("Unknown section '%s'" % self.in_section)
381 self.in_section = None
382 self.skip_blank = True
385 self.section.append(line)
387 # If we are not in a section, it is an unexpected END
389 raise ValueError("'END' wihout section")
391 # Detect the commit subject
392 elif not is_blank and self.state == STATE_PATCH_SUBJECT:
393 self.commit.subject = line
395 # Detect the tags we want to remove, and skip blank lines
396 elif RE_REMOVE.match(line) and not commit_tag_match:
397 self.skip_blank = True
399 # TEST= should be the last thing in the commit, so remove
400 # everything after it
401 if line.startswith('TEST='):
402 self.found_test = True
403 elif self.skip_blank and is_blank:
404 self.skip_blank = False
406 # Detect Cover-xxx tags
408 name = cover_match.group(1)
409 value = cover_match.group(2)
411 self.in_section = 'cover'
412 self.skip_blank = False
413 elif name == 'letter-cc':
414 self._add_to_series(line, 'cover-cc', value)
415 elif name == 'changes':
416 self.in_change = 'Cover'
417 self.change_version = self._parse_version(value, line)
419 # If we are in a change list, key collected lines until a blank one
422 # Blank line ends this change list
423 self._finalise_change()
424 self.in_change = None
425 self.change_version = 0
427 self._finalise_change()
428 self.in_change = None
429 self.change_version = 0
430 out = self.process_line(line)
432 if not leading_whitespace_match:
433 self._finalise_change()
434 self.change_lines.append(line)
435 self.skip_blank = False
437 # Detect Series-xxx tags
438 elif series_tag_match:
439 name = series_tag_match.group(1)
440 value = series_tag_match.group(2)
441 if name == 'changes':
442 # value is the version number: e.g. 1, or 2
443 self.in_change = 'Series'
444 self.change_version = self._parse_version(value, line)
446 self._add_to_series(line, name, value)
447 self.skip_blank = True
449 # Detect Change-Id tags
450 elif change_id_match:
451 value = change_id_match.group(1)
453 if self.commit.change_id:
455 "%s: Two Change-Ids: '%s' vs. '%s'" % self.commit.hash,
456 self.commit.change_id, value)
457 self.commit.change_id = value
458 self.skip_blank = True
460 # Detect Commit-xxx tags
461 elif commit_tag_match:
462 name = commit_tag_match.group(1)
463 value = commit_tag_match.group(2)
465 self._add_to_commit(name)
466 self.skip_blank = True
467 elif name == 'changes':
468 self.in_change = 'Commit'
469 self.change_version = self._parse_version(value, line)
471 self._add_warn('Line %d: Ignoring Commit-%s' %
472 (self.linenum, name))
474 # Detect the start of a new commit
477 self.commit = commit.Commit(commit_match.group(1))
479 # Detect tags in the commit message
481 rtag_type, who = tag_match.groups()
482 self._add_commit_rtag(rtag_type, who)
483 # Remove Tested-by self, since few will take much notice
484 if (rtag_type == 'Tested-by' and
485 who.find(os.getenv('USER') + '@') != -1):
486 self._add_warn("Ignoring '%s'" % line)
487 elif rtag_type == 'Patch-cc':
488 self.commit.AddCc(who.split(','))
492 # Suppress duplicate signoffs
494 if (self.is_log or not self.commit or
495 self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
498 # Well that means this is an ordinary line
500 # Look for space before tab
501 mat = RE_SPACE_BEFORE_TAB.match(line)
503 self._add_warn('Line %d/%d has space before tab' %
504 (self.linenum, mat.start()))
506 # OK, we have a valid non-blank line
509 self.skip_blank = False
512 self.cur_diff = diff_match.group(1)
514 # If this is quoted, keep recent lines
515 if not diff_match and self.linenum > 1 and line:
516 if line.startswith('>'):
517 if not self.was_quoted:
518 self._finalise_snippet()
519 self.recent_line = None
521 self.recent_quoted.append(line)
522 self.was_quoted = True
523 self.recent_diff = self.cur_diff
525 self.recent_unquoted.put(line)
526 self.was_quoted = False
529 self.recent_line = line_match.groups()
531 if self.state == STATE_DIFFS:
534 # If this is the start of the diffs section, emit our tags and
537 self.state = STATE_DIFFS
539 # Output the tags (signoff first), then change list
541 log = self.series.MakeChangeLog(self.commit)
544 out += self.commit.notes
546 elif self.found_test:
547 if not RE_ALLOWED_AFTER_TEST.match(line):
548 self.lines_after_test += 1
553 """Close out processing of this patch stream"""
554 self._finalise_snippet()
555 self._finalise_change()
557 if self.lines_after_test:
558 self._add_warn('Found %d lines after TEST=' % self.lines_after_test)
560 def _write_message_id(self, outfd):
561 """Write the Message-Id into the output.
563 This is based on the Change-Id in the original patch, the version,
567 outfd (io.IOBase): Output stream file object
569 if not self.commit.change_id:
572 # If the count is -1 we're testing, so use a fixed time
573 if self.commit.count == -1:
574 time_now = datetime.datetime(1999, 12, 31, 23, 59, 59)
576 time_now = datetime.datetime.now()
578 # In theory there is email.utils.make_msgid() which would be nice
579 # to use, but it already produces something way too long and thus
580 # will produce ugly commit lines if someone throws this into
581 # a "Link:" tag in the final commit. So (sigh) roll our own.
583 # Start with the time; presumably we wouldn't send the same series
584 # with the same Change-Id at the exact same second.
585 parts = [time_now.strftime("%Y%m%d%H%M%S")]
587 # These seem like they would be nice to include.
588 if 'prefix' in self.series:
589 parts.append(self.series['prefix'])
590 if 'version' in self.series:
591 parts.append("v%s" % self.series['version'])
593 parts.append(str(self.commit.count + 1))
595 # The Change-Id must be last, right before the @
596 parts.append(self.commit.change_id)
598 # Join parts together with "." and write it out.
599 outfd.write('Message-Id: <%s@changeid>\n' % '.'.join(parts))
601 def process_stream(self, infd, outfd):
602 """Copy a stream from infd to outfd, filtering out unwanting things.
604 This is used to process patch files one at a time.
607 infd (io.IOBase): Input stream file object
608 outfd (io.IOBase): Output stream file object
610 # Extract the filename from each diff, for nice warnings
613 re_fname = re.compile('diff --git a/(.*) b/.*')
615 self._write_message_id(outfd)
618 line = infd.readline()
621 out = self.process_line(line)
623 # Try to detect blank lines at EOF
625 match = re_fname.match(line)
628 fname = match.group(1)
630 self.blank_count += 1
632 if self.blank_count and (line == '-- ' or match):
633 self._add_warn("Found possible blank line(s) at end of file '%s'" %
635 outfd.write('+\n' * self.blank_count)
636 outfd.write(line + '\n')
640 def insert_tags(msg, tags_to_emit):
641 """Add extra tags to a commit message
643 The tags are added after an existing block of tags if found, otherwise at
647 msg (str): Commit message
648 tags_to_emit (list): List of tags to emit, each a str
656 for line in msg.splitlines():
658 signoff_match = RE_SIGNOFF.match(line)
659 tag_match = RE_TAG.match(line)
660 if tag_match or signoff_match:
662 if emit_tags and not tag_match and not signoff_match:
670 return '\n'.join(out)
672 def get_list(commit_range, git_dir=None, count=None):
673 """Get a log of a list of comments
675 This returns the output of 'git log' for the selected commits
678 commit_range (str): Range of commits to count (e.g. 'HEAD..base')
679 git_dir (str): Path to git repositiory (None to use default)
680 count (int): Number of commits to list, or None for no limit
683 str: String containing the contents of the git log
685 params = gitutil.LogCmd(commit_range, reverse=True, count=count,
687 return command.RunPipe([params], capture=True).stdout
689 def get_metadata_for_list(commit_range, git_dir=None, count=None,
690 series=None, allow_overwrite=False):
691 """Reads out patch series metadata from the commits
693 This does a 'git log' on the relevant commits and pulls out the tags we
697 commit_range (str): Range of commits to count (e.g. 'HEAD..base')
698 git_dir (str): Path to git repositiory (None to use default)
699 count (int): Number of commits to list, or None for no limit
700 series (Series): Object to add information into. By default a new series
702 allow_overwrite (bool): Allow tags to overwrite an existing tag
705 Series: Object containing information about the commits.
709 series.allow_overwrite = allow_overwrite
710 stdout = get_list(commit_range, git_dir, count)
711 pst = PatchStream(series, is_log=True)
712 for line in stdout.splitlines():
713 pst.process_line(line)
717 def get_metadata(branch, start, count):
718 """Reads out patch series metadata from the commits
720 This does a 'git log' on the relevant commits and pulls out the tags we
724 branch (str): Branch to use (None for current branch)
725 start (int): Commit to start from: 0=branch HEAD, 1=next one, etc.
726 count (int): Number of commits to list
729 Series: Object containing information about the commits.
731 return get_metadata_for_list(
732 '%s~%d' % (branch if branch else 'HEAD', start), None, count)
734 def get_metadata_for_test(text):
735 """Process metadata from a file containing a git log. Used for tests
741 Series: Object containing information about the commits.
744 pst = PatchStream(series, is_log=True)
745 for line in text.splitlines():
746 pst.process_line(line)
750 def fix_patch(backup_dir, fname, series, cmt):
751 """Fix up a patch file, by adding/removing as required.
753 We remove our tags from the patch file, insert changes lists, etc.
754 The patch file is processed in place, and overwritten.
756 A backup file is put into backup_dir (if not None).
759 backup_dir (str): Path to directory to use to backup the file
760 fname (str): Filename to patch file to process
761 series (Series): Series information about this patch set
762 cmt (Commit): Commit object for this patch file
765 list: A list of errors, each str, or [] if all ok.
767 handle, tmpname = tempfile.mkstemp()
768 outfd = os.fdopen(handle, 'w', encoding='utf-8')
769 infd = open(fname, 'r', encoding='utf-8')
770 pst = PatchStream(series)
772 pst.process_stream(infd, outfd)
776 # Create a backup file if required
778 shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
779 shutil.move(tmpname, fname)
782 def fix_patches(series, fnames):
783 """Fix up a list of patches identified by filenames
785 The patch files are processed in place, and overwritten.
788 series (Series): The Series object
789 fnames (:type: list of str): List of patch files to process
791 # Current workflow creates patches, so we shouldn't need a backup
792 backup_dir = None #tempfile.mkdtemp('clean-patch')
795 cmt = series.commits[count]
798 result = fix_patch(backup_dir, fname, series, cmt)
800 print('%d warning%s for %s:' %
801 (len(result), 's' if len(result) > 1 else '', fname))
806 print('Cleaned %d patch%s' % (count, 'es' if count > 1 else ''))
808 def insert_cover_letter(fname, series, count):
809 """Inserts a cover letter with the required info into patch 0
812 fname (str): Input / output filename of the cover letter file
813 series (Series): Series object
814 count (int): Number of patches in the series
816 fil = open(fname, 'r')
817 lines = fil.readlines()
820 fil = open(fname, 'w')
822 prefix = series.GetPatchPrefix()
824 if line.startswith('Subject:'):
825 # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
826 zero_repeat = int(math.log10(count)) + 1
827 zero = '0' * zero_repeat
828 line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
830 # Insert our cover letter
831 elif line.startswith('*** BLURB HERE ***'):
832 # First the blurb test
833 line = '\n'.join(text[1:]) + '\n'
834 if series.get('notes'):
835 line += '\n'.join(series.notes) + '\n'
837 # Now the change list
838 out = series.MakeChangeLog(None)
839 line += '\n' + '\n'.join(out)