1 # SPDX-License-Identifier: GPL-2.0+
2 # Copyright (c) 2011 The Chromium OS Authors.
5 """Handles parsing a stream of commits/emails from 'git log' or other source"""
17 from patman import command
18 from patman import commit
19 from patman import gitutil
20 from patman.series import Series
22 # Tags that we detect and remove
23 RE_REMOVE = re.compile(r'^BUG=|^TEST=|^BRANCH=|^Review URL:'
24 r'|Reviewed-on:|Commit-\w*:')
26 # Lines which are allowed after a TEST= line
27 RE_ALLOWED_AFTER_TEST = re.compile('^Signed-off-by:')
30 RE_SIGNOFF = re.compile('^Signed-off-by: *(.*)')
33 RE_COVER = re.compile('^Cover-([a-z-]*): *(.*)')
36 RE_SERIES_TAG = re.compile('^Series-([a-z-]*): *(.*)')
38 # Change-Id will be used to generate the Message-Id and then be stripped
39 RE_CHANGE_ID = re.compile('^Change-Id: *(.*)')
42 RE_COMMIT_TAG = re.compile('^Commit-([a-z-]*): *(.*)')
44 # Commit tags that we want to collect and keep
45 RE_TAG = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc|Fixes): (.*)')
47 # The start of a new commit in the git log
48 RE_COMMIT = re.compile('^commit ([0-9a-f]*)$')
50 # We detect these since checkpatch doesn't always do it
51 RE_SPACE_BEFORE_TAB = re.compile('^[+].* \t')
53 # Match indented lines for changes
54 RE_LEADING_WHITESPACE = re.compile(r'^\s')
56 # Detect a 'diff' line
57 RE_DIFF = re.compile(r'^>.*diff --git a/(.*) b/(.*)$')
59 # Detect a context line, like '> @@ -153,8 +153,13 @@ CheckPatch
60 RE_LINE = re.compile(r'>.*@@ \-(\d+),\d+ \+(\d+),\d+ @@ *(.*)')
62 # Detect line with invalid TAG
63 RE_INV_TAG = re.compile('^Serie-([a-z-]*): *(.*)')
65 # States we can be in - can we use range() and still have comments?
66 STATE_MSG_HEADER = 0 # Still in the message header
67 STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit)
68 STATE_PATCH_HEADER = 2 # In patch header (after the subject)
69 STATE_DIFFS = 3 # In the diff part (past --- line)
72 """Class for detecting/injecting tags in a patch or series of patches
74 We support processing the output of 'git log' to read out the tags we
75 are interested in. We can also process a patch file in order to remove
76 unwanted tags or inject additional ones. These correspond to the two
79 def __init__(self, series, is_log=False):
80 self.skip_blank = False # True to skip a single blank line
81 self.found_test = False # Found a TEST= line
82 self.lines_after_test = 0 # Number of lines found after TEST=
83 self.linenum = 1 # Output line number we are up to
84 self.in_section = None # Name of start...END section we are in
85 self.notes = [] # Series notes
86 self.section = [] # The current section...END section
87 self.series = series # Info about the patch series
88 self.is_log = is_log # True if indent like git log
89 self.in_change = None # Name of the change list we are in
90 self.change_version = 0 # Non-zero if we are in a change list
91 self.change_lines = [] # Lines of the current change
92 self.blank_count = 0 # Number of blank lines stored up
93 self.state = STATE_MSG_HEADER # What state are we in?
94 self.commit = None # Current commit
95 # List of unquoted test blocks, each a list of str lines
97 self.cur_diff = None # Last 'diff' line seen (str)
98 self.cur_line = None # Last context (@@) line seen (str)
99 self.recent_diff = None # 'diff' line for current snippet (str)
100 self.recent_line = None # '@@' line for current snippet (str)
101 self.recent_quoted = collections.deque([], 5)
102 self.recent_unquoted = queue.Queue()
103 self.was_quoted = None
106 def process_text(text, is_comment=False):
107 """Process some text through this class using a default Commit/Series
110 text (str): Text to parse
111 is_comment (bool): True if this is a comment rather than a patch.
112 If True, PatchStream doesn't expect a patch subject at the
113 start, but jumps straight into the body
116 PatchStream: object with results
118 pstrm = PatchStream(Series())
119 pstrm.commit = commit.Commit(None)
120 infd = io.StringIO(text)
121 outfd = io.StringIO()
123 pstrm.state = STATE_PATCH_HEADER
124 pstrm.process_stream(infd, outfd)
127 def _add_warn(self, warn):
128 """Add a new warning to report to the user about the current commit
130 The new warning is added to the current commit if not already present.
133 warn (str): Warning to report
136 ValueError: Warning is generated with no commit associated
139 print('Warning outside commit: %s' % warn)
140 elif warn not in self.commit.warn:
141 self.commit.warn.append(warn)
143 def _add_to_series(self, line, name, value):
144 """Add a new Series-xxx tag.
146 When a Series-xxx tag is detected, we come here to record it, if we
147 are scanning a 'git log'.
150 line (str): Source line containing tag (useful for debug/error
152 name (str): Tag name (part after 'Series-')
153 value (str): Tag value (part after 'Series-xxx: ')
156 self.in_section = name
157 self.skip_blank = False
159 warn = self.series.AddTag(self.commit, line, name, value)
161 self.commit.warn.append(warn)
163 def _add_to_commit(self, name):
164 """Add a new Commit-xxx tag.
166 When a Commit-xxx tag is detected, we come here to record it.
169 name (str): Tag name (part after 'Commit-')
172 self.in_section = 'commit-' + name
173 self.skip_blank = False
175 def _add_commit_rtag(self, rtag_type, who):
176 """Add a response tag to the current commit
179 rtag_type (str): rtag type (e.g. 'Reviewed-by')
180 who (str): Person who gave that rtag, e.g.
181 'Fred Bloggs <fred@bloggs.org>'
183 self.commit.add_rtag(rtag_type, who)
185 def _close_commit(self):
186 """Save the current commit into our commit list, and reset our state"""
187 if self.commit and self.is_log:
188 self.series.AddCommit(self.commit)
190 # If 'END' is missing in a 'Cover-letter' section, and that section
191 # happens to show up at the very end of the commit message, this is
192 # the chance for us to fix it up.
193 if self.in_section == 'cover' and self.is_log:
194 self.series.cover = self.section
195 self.in_section = None
196 self.skip_blank = True
200 self.recent_diff = None
201 self.recent_line = None
203 def _parse_version(self, value, line):
204 """Parse a version from a *-changes tag
207 value (str): Tag value (part after 'xxx-changes: '
208 line (str): Source line containing tag
211 int: The version as an integer
214 ValueError: the value cannot be converted
219 raise ValueError("%s: Cannot decode version info '%s'" %
220 (self.commit.hash, line))
222 def _finalise_change(self):
223 """_finalise a (multi-line) change and add it to the series or commit"""
224 if not self.change_lines:
226 change = '\n'.join(self.change_lines)
228 if self.in_change == 'Series':
229 self.series.AddChange(self.change_version, self.commit, change)
230 elif self.in_change == 'Cover':
231 self.series.AddChange(self.change_version, None, change)
232 elif self.in_change == 'Commit':
233 self.commit.add_change(self.change_version, change)
234 self.change_lines = []
236 def _finalise_snippet(self):
237 """Finish off a snippet and add it to the list
239 This is called when we get to the end of a snippet, i.e. the we enter
240 the next block of quoted text:
242 This is a comment from someone.
246 > Now we have some code <----- end of snippet
249 Now a comment about the above code
251 This adds the snippet to our list
254 while self.recent_quoted:
255 quoted_lines.append(self.recent_quoted.popleft())
258 while not self.recent_unquoted.empty():
259 text = self.recent_unquoted.get()
260 if not (text.startswith('On ') and text.endswith('wrote:')):
261 unquoted_lines.append(text)
267 lines.append('> File: %s' % self.recent_diff)
269 out = '> Line: %s / %s' % self.recent_line[:2]
270 if self.recent_line[2]:
271 out += ': %s' % self.recent_line[2]
273 lines += quoted_lines + unquoted_lines
275 self.snippets.append(lines)
277 def process_line(self, line):
278 """Process a single line of a patch file or commit log
280 This process a line and returns a list of lines to output. The list
281 may be empty or may contain multiple output lines.
283 This is where all the complicated logic is located. The class's
284 state is used to move between different states and detect things
287 We can be in one of two modes:
288 self.is_log == True: This is 'git log' mode, where most output is
289 indented by 4 characters and we are scanning for tags
291 self.is_log == False: This is 'patch' mode, where we already have
292 all the tags, and are processing patches to remove junk we
293 don't want, and add things we think are required.
296 line (str): text line to process
299 list: list of output lines, or [] if nothing should be output
302 ValueError: a fatal error occurred while parsing, e.g. an END
303 without a starting tag, or two commits with two change IDs
305 # Initially we have no output. Prepare the input line string
307 line = line.rstrip('\n')
309 commit_match = RE_COMMIT.match(line) if self.is_log else None
315 # Handle state transition and skipping blank lines
316 series_tag_match = RE_SERIES_TAG.match(line)
317 change_id_match = RE_CHANGE_ID.match(line)
318 commit_tag_match = RE_COMMIT_TAG.match(line)
319 cover_match = RE_COVER.match(line)
320 signoff_match = RE_SIGNOFF.match(line)
321 leading_whitespace_match = RE_LEADING_WHITESPACE.match(line)
322 diff_match = RE_DIFF.match(line)
323 line_match = RE_LINE.match(line)
324 invalid_match = RE_INV_TAG.match(line)
326 if self.state == STATE_PATCH_HEADER:
327 tag_match = RE_TAG.match(line)
328 is_blank = not line.strip()
330 if (self.state == STATE_MSG_HEADER
331 or self.state == STATE_PATCH_SUBJECT):
334 # We don't have a subject in the text stream of patch files
335 # It has its own line with a Subject: tag
336 if not self.is_log and self.state == STATE_PATCH_SUBJECT:
339 self.state = STATE_MSG_HEADER
341 # If a tag is detected, or a new commit starts
342 if series_tag_match or commit_tag_match or change_id_match or \
343 cover_match or signoff_match or self.state == STATE_MSG_HEADER:
344 # but we are already in a section, this means 'END' is missing
345 # for that section, fix it up.
347 self._add_warn("Missing 'END' in section '%s'" % self.in_section)
348 if self.in_section == 'cover':
349 self.series.cover = self.section
350 elif self.in_section == 'notes':
352 self.series.notes += self.section
353 elif self.in_section == 'commit-notes':
355 self.commit.notes += self.section
357 # This should not happen
358 raise ValueError("Unknown section '%s'" % self.in_section)
359 self.in_section = None
360 self.skip_blank = True
362 # but we are already in a change list, that means a blank line
363 # is missing, fix it up.
365 self._add_warn("Missing 'blank line' in section '%s-changes'" %
367 self._finalise_change()
368 self.in_change = None
369 self.change_version = 0
371 # If we are in a section, keep collecting lines until we see END
374 if self.in_section == 'cover':
375 self.series.cover = self.section
376 elif self.in_section == 'notes':
378 self.series.notes += self.section
379 elif self.in_section == 'commit-notes':
381 self.commit.notes += self.section
383 # This should not happen
384 raise ValueError("Unknown section '%s'" % self.in_section)
385 self.in_section = None
386 self.skip_blank = True
389 self.section.append(line)
391 # If we are not in a section, it is an unexpected END
393 raise ValueError("'END' wihout section")
395 # Detect the commit subject
396 elif not is_blank and self.state == STATE_PATCH_SUBJECT:
397 self.commit.subject = line
399 # Detect the tags we want to remove, and skip blank lines
400 elif RE_REMOVE.match(line) and not commit_tag_match:
401 self.skip_blank = True
403 # TEST= should be the last thing in the commit, so remove
404 # everything after it
405 if line.startswith('TEST='):
406 self.found_test = True
407 elif self.skip_blank and is_blank:
408 self.skip_blank = False
410 # Detect Cover-xxx tags
412 name = cover_match.group(1)
413 value = cover_match.group(2)
415 self.in_section = 'cover'
416 self.skip_blank = False
417 elif name == 'letter-cc':
418 self._add_to_series(line, 'cover-cc', value)
419 elif name == 'changes':
420 self.in_change = 'Cover'
421 self.change_version = self._parse_version(value, line)
423 # If we are in a change list, key collected lines until a blank one
426 # Blank line ends this change list
427 self._finalise_change()
428 self.in_change = None
429 self.change_version = 0
431 self._finalise_change()
432 self.in_change = None
433 self.change_version = 0
434 out = self.process_line(line)
436 if not leading_whitespace_match:
437 self._finalise_change()
438 self.change_lines.append(line)
439 self.skip_blank = False
441 # Detect Series-xxx tags
442 elif series_tag_match:
443 name = series_tag_match.group(1)
444 value = series_tag_match.group(2)
445 if name == 'changes':
446 # value is the version number: e.g. 1, or 2
447 self.in_change = 'Series'
448 self.change_version = self._parse_version(value, line)
450 self._add_to_series(line, name, value)
451 self.skip_blank = True
453 # Detect Change-Id tags
454 elif change_id_match:
455 value = change_id_match.group(1)
457 if self.commit.change_id:
459 "%s: Two Change-Ids: '%s' vs. '%s'" %
460 (self.commit.hash, self.commit.change_id, value))
461 self.commit.change_id = value
462 self.skip_blank = True
464 # Detect Commit-xxx tags
465 elif commit_tag_match:
466 name = commit_tag_match.group(1)
467 value = commit_tag_match.group(2)
469 self._add_to_commit(name)
470 self.skip_blank = True
471 elif name == 'changes':
472 self.in_change = 'Commit'
473 self.change_version = self._parse_version(value, line)
475 self._add_warn('Line %d: Ignoring Commit-%s' %
476 (self.linenum, name))
478 # Detect invalid tags
480 raise ValueError("Line %d: Invalid tag = '%s'" %
481 (self.linenum, line))
483 # Detect the start of a new commit
486 self.commit = commit.Commit(commit_match.group(1))
488 # Detect tags in the commit message
490 rtag_type, who = tag_match.groups()
491 self._add_commit_rtag(rtag_type, who)
492 # Remove Tested-by self, since few will take much notice
493 if (rtag_type == 'Tested-by' and
494 who.find(os.getenv('USER') + '@') != -1):
495 self._add_warn("Ignoring '%s'" % line)
496 elif rtag_type == 'Patch-cc':
497 self.commit.add_cc(who.split(','))
501 # Suppress duplicate signoffs
503 if (self.is_log or not self.commit or
504 self.commit.check_duplicate_signoff(signoff_match.group(1))):
507 # Well that means this is an ordinary line
509 # Look for space before tab
510 mat = RE_SPACE_BEFORE_TAB.match(line)
512 self._add_warn('Line %d/%d has space before tab' %
513 (self.linenum, mat.start()))
515 # OK, we have a valid non-blank line
518 self.skip_blank = False
521 self.cur_diff = diff_match.group(1)
523 # If this is quoted, keep recent lines
524 if not diff_match and self.linenum > 1 and line:
525 if line.startswith('>'):
526 if not self.was_quoted:
527 self._finalise_snippet()
528 self.recent_line = None
530 self.recent_quoted.append(line)
531 self.was_quoted = True
532 self.recent_diff = self.cur_diff
534 self.recent_unquoted.put(line)
535 self.was_quoted = False
538 self.recent_line = line_match.groups()
540 if self.state == STATE_DIFFS:
543 # If this is the start of the diffs section, emit our tags and
546 self.state = STATE_DIFFS
548 # Output the tags (signoff first), then change list
550 log = self.series.MakeChangeLog(self.commit)
553 out += self.commit.notes
555 elif self.found_test:
556 if not RE_ALLOWED_AFTER_TEST.match(line):
557 self.lines_after_test += 1
562 """Close out processing of this patch stream"""
563 self._finalise_snippet()
564 self._finalise_change()
566 if self.lines_after_test:
567 self._add_warn('Found %d lines after TEST=' % self.lines_after_test)
569 def _write_message_id(self, outfd):
570 """Write the Message-Id into the output.
572 This is based on the Change-Id in the original patch, the version,
576 outfd (io.IOBase): Output stream file object
578 if not self.commit.change_id:
581 # If the count is -1 we're testing, so use a fixed time
582 if self.commit.count == -1:
583 time_now = datetime.datetime(1999, 12, 31, 23, 59, 59)
585 time_now = datetime.datetime.now()
587 # In theory there is email.utils.make_msgid() which would be nice
588 # to use, but it already produces something way too long and thus
589 # will produce ugly commit lines if someone throws this into
590 # a "Link:" tag in the final commit. So (sigh) roll our own.
592 # Start with the time; presumably we wouldn't send the same series
593 # with the same Change-Id at the exact same second.
594 parts = [time_now.strftime("%Y%m%d%H%M%S")]
596 # These seem like they would be nice to include.
597 if 'prefix' in self.series:
598 parts.append(self.series['prefix'])
599 if 'postfix' in self.series:
600 parts.append(self.serties['postfix'])
601 if 'version' in self.series:
602 parts.append("v%s" % self.series['version'])
604 parts.append(str(self.commit.count + 1))
606 # The Change-Id must be last, right before the @
607 parts.append(self.commit.change_id)
609 # Join parts together with "." and write it out.
610 outfd.write('Message-Id: <%s@changeid>\n' % '.'.join(parts))
612 def process_stream(self, infd, outfd):
613 """Copy a stream from infd to outfd, filtering out unwanting things.
615 This is used to process patch files one at a time.
618 infd (io.IOBase): Input stream file object
619 outfd (io.IOBase): Output stream file object
621 # Extract the filename from each diff, for nice warnings
624 re_fname = re.compile('diff --git a/(.*) b/.*')
626 self._write_message_id(outfd)
629 line = infd.readline()
632 out = self.process_line(line)
634 # Try to detect blank lines at EOF
636 match = re_fname.match(line)
639 fname = match.group(1)
641 self.blank_count += 1
643 if self.blank_count and (line == '-- ' or match):
644 self._add_warn("Found possible blank line(s) at end of file '%s'" %
646 outfd.write('+\n' * self.blank_count)
647 outfd.write(line + '\n')
651 def insert_tags(msg, tags_to_emit):
652 """Add extra tags to a commit message
654 The tags are added after an existing block of tags if found, otherwise at
658 msg (str): Commit message
659 tags_to_emit (list): List of tags to emit, each a str
668 for line in msg.splitlines():
670 signoff_match = RE_SIGNOFF.match(line)
671 tag_match = RE_TAG.match(line)
672 if tag_match or signoff_match:
674 if emit_tags and not tag_match and not signoff_match:
678 emit_blank = not (signoff_match or tag_match)
686 return '\n'.join(out)
688 def get_list(commit_range, git_dir=None, count=None):
689 """Get a log of a list of comments
691 This returns the output of 'git log' for the selected commits
694 commit_range (str): Range of commits to count (e.g. 'HEAD..base')
695 git_dir (str): Path to git repositiory (None to use default)
696 count (int): Number of commits to list, or None for no limit
699 str: String containing the contents of the git log
701 params = gitutil.LogCmd(commit_range, reverse=True, count=count,
703 return command.run_pipe([params], capture=True).stdout
705 def get_metadata_for_list(commit_range, git_dir=None, count=None,
706 series=None, allow_overwrite=False):
707 """Reads out patch series metadata from the commits
709 This does a 'git log' on the relevant commits and pulls out the tags we
713 commit_range (str): Range of commits to count (e.g. 'HEAD..base')
714 git_dir (str): Path to git repositiory (None to use default)
715 count (int): Number of commits to list, or None for no limit
716 series (Series): Object to add information into. By default a new series
718 allow_overwrite (bool): Allow tags to overwrite an existing tag
721 Series: Object containing information about the commits.
725 series.allow_overwrite = allow_overwrite
726 stdout = get_list(commit_range, git_dir, count)
727 pst = PatchStream(series, is_log=True)
728 for line in stdout.splitlines():
729 pst.process_line(line)
733 def get_metadata(branch, start, count):
734 """Reads out patch series metadata from the commits
736 This does a 'git log' on the relevant commits and pulls out the tags we
740 branch (str): Branch to use (None for current branch)
741 start (int): Commit to start from: 0=branch HEAD, 1=next one, etc.
742 count (int): Number of commits to list
745 Series: Object containing information about the commits.
747 return get_metadata_for_list(
748 '%s~%d' % (branch if branch else 'HEAD', start), None, count)
750 def get_metadata_for_test(text):
751 """Process metadata from a file containing a git log. Used for tests
757 Series: Object containing information about the commits.
760 pst = PatchStream(series, is_log=True)
761 for line in text.splitlines():
762 pst.process_line(line)
766 def fix_patch(backup_dir, fname, series, cmt):
767 """Fix up a patch file, by adding/removing as required.
769 We remove our tags from the patch file, insert changes lists, etc.
770 The patch file is processed in place, and overwritten.
772 A backup file is put into backup_dir (if not None).
775 backup_dir (str): Path to directory to use to backup the file
776 fname (str): Filename to patch file to process
777 series (Series): Series information about this patch set
778 cmt (Commit): Commit object for this patch file
781 list: A list of errors, each str, or [] if all ok.
783 handle, tmpname = tempfile.mkstemp()
784 outfd = os.fdopen(handle, 'w', encoding='utf-8')
785 infd = open(fname, 'r', encoding='utf-8')
786 pst = PatchStream(series)
788 pst.process_stream(infd, outfd)
792 # Create a backup file if required
794 shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
795 shutil.move(tmpname, fname)
798 def fix_patches(series, fnames):
799 """Fix up a list of patches identified by filenames
801 The patch files are processed in place, and overwritten.
804 series (Series): The Series object
805 fnames (:type: list of str): List of patch files to process
807 # Current workflow creates patches, so we shouldn't need a backup
808 backup_dir = None #tempfile.mkdtemp('clean-patch')
811 cmt = series.commits[count]
814 result = fix_patch(backup_dir, fname, series, cmt)
816 print('%d warning%s for %s:' %
817 (len(result), 's' if len(result) > 1 else '', fname))
822 print('Cleaned %d patch%s' % (count, 'es' if count > 1 else ''))
824 def insert_cover_letter(fname, series, count):
825 """Inserts a cover letter with the required info into patch 0
828 fname (str): Input / output filename of the cover letter file
829 series (Series): Series object
830 count (int): Number of patches in the series
832 fil = open(fname, 'r')
833 lines = fil.readlines()
836 fil = open(fname, 'w')
838 prefix = series.GetPatchPrefix()
840 if line.startswith('Subject:'):
841 # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
842 zero_repeat = int(math.log10(count)) + 1
843 zero = '0' * zero_repeat
844 line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
846 # Insert our cover letter
847 elif line.startswith('*** BLURB HERE ***'):
848 # First the blurb test
849 line = '\n'.join(text[1:]) + '\n'
850 if series.get('notes'):
851 line += '\n'.join(series.notes) + '\n'
853 # Now the change list
854 out = series.MakeChangeLog(None)
855 line += '\n' + '\n'.join(out)