1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package printer implements printing of AST nodes.
20 const debug = false // enable for debugging
26 ignore = whiteSpace(0)
27 blank = whiteSpace(' ')
28 vtab = whiteSpace('\v')
29 newline = whiteSpace('\n')
30 formfeed = whiteSpace('\f')
31 indent = whiteSpace('>')
32 unindent = whiteSpace('<')
36 esc = []byte{tabwriter.Escape}
38 htabs = []byte("\t\t\t\t\t\t\t\t")
39 newlines = []byte("\n\n\n\n\n\n\n\n") // more than the max determined by nlines
40 formfeeds = []byte("\f\f\f\f\f\f\f\f") // more than the max determined by nlines
44 var noPos token.Position // use noPos when a position is needed but not known
45 var infinity = 1 << 30
47 // Use ignoreMultiLine if the multiLine information is not important.
48 var ignoreMultiLine = new(bool)
50 // A pmode value represents the current printer mode.
54 inLiteral pmode = 1 << iota
59 // Configuration (does not change after initialization)
66 written int // number of bytes written
67 indent int // current indentation
68 mode pmode // current printer mode
69 lastTok token.Token // the last token printed (token.ILLEGAL if it's whitespace)
72 wsbuf []whiteSpace // delayed white space
73 litbuf bytes.Buffer // for creation of escaped literals and comments
75 // The (possibly estimated) position in the generated output;
76 // in AST space (i.e., pos is set whenever a token position is
77 // known accurately, and updated dependending on what has been
81 // The value of pos immediately after the last item has been
82 // written using writeItem.
85 // The list of all source comments, in order of appearance.
86 comments []*ast.CommentGroup // may be nil
87 cindex int // current comment index
88 useNodeComments bool // if not set, ignore lead and line comments of nodes
90 // Cache of already computed node sizes.
91 nodeSizes map[ast.Node]int
94 func (p *printer) init(output io.Writer, cfg *Config, fset *token.FileSet, nodeSizes map[ast.Node]int) {
98 p.errors = make(chan os.Error)
99 p.wsbuf = make([]whiteSpace, 0, 16) // whitespace sequences are short
100 p.nodeSizes = nodeSizes
103 func (p *printer) internalError(msg ...interface{}) {
105 fmt.Print(p.pos.String() + ": ")
111 // escape escapes string s by bracketing it with tabwriter.Escape.
112 // Escaped strings pass through tabwriter unchanged. (Note that
113 // valid Go programs cannot contain tabwriter.Escape bytes since
114 // they do not appear in legal UTF-8 sequences).
116 func (p *printer) escape(s string) string {
118 p.litbuf.WriteByte(tabwriter.Escape)
119 p.litbuf.WriteString(s)
120 p.litbuf.WriteByte(tabwriter.Escape)
121 return p.litbuf.String()
124 // nlines returns the adjusted number of linebreaks given the desired number
125 // of breaks n such that min <= result <= max.
127 func (p *printer) nlines(n, min int) int {
128 const max = 2 // max. number of newlines
138 // write0 writes raw (uninterpreted) data to p.output and handles errors.
139 // write0 does not indent after newlines, and does not HTML-escape or update p.pos.
141 func (p *printer) write0(data []byte) {
143 n, err := p.output.Write(data)
152 // write interprets data and writes it to p.output. It inserts indentation
153 // after a line break unless in a tabwriter escape sequence.
154 // It updates p.pos as a side-effect.
156 func (p *printer) write(data []byte) {
158 for i, b := range data {
161 // write segment ending in b
162 p.write0(data[i0 : i+1])
165 p.pos.Offset += i + 1 - i0
169 if p.mode&inLiteral == 0 {
171 // use "hard" htabs - indentation columns
172 // must not be discarded by the tabwriter
174 for ; j > len(htabs); j -= len(htabs) {
180 p.pos.Offset += p.indent
181 p.pos.Column += p.indent
184 // next segment start
187 case tabwriter.Escape:
190 // ignore escape chars introduced by printer - they are
191 // invisible and must not affect p.pos (was issue #1089)
197 // write remaining segment
206 func (p *printer) writeNewlines(n int, useFF bool) {
210 p.write(formfeeds[0:n])
212 p.write(newlines[0:n])
217 // writeItem writes data at position pos. data is the text corresponding to
218 // a single lexical token, but may also be comment text. pos is the actual
219 // (or at least very accurately estimated) position of the data in the original
220 // source text. writeItem updates p.last to the position immediately following
223 func (p *printer) writeItem(pos token.Position, data string) {
225 // continue with previous position if we don't have a valid pos
226 if p.last.IsValid() && p.last.Filename != pos.Filename {
227 // the file has changed - reset state
228 // (used when printing merged ASTs of different files
229 // e.g., the result of ast.MergePackageFiles)
232 p.wsbuf = p.wsbuf[0:0]
237 // do not update p.pos - use write0
238 _, filename := filepath.Split(pos.Filename)
239 p.write0([]byte(fmt.Sprintf("[%s:%d:%d]", filename, pos.Line, pos.Column)))
241 p.write([]byte(data))
245 // writeCommentPrefix writes the whitespace before a comment.
246 // If there is any pending whitespace, it consumes as much of
247 // it as is likely to help position the comment nicely.
248 // pos is the comment position, next the position of the item
249 // after all pending comments, prev is the previous comment in
250 // a group of comments (or nil), and isKeyword indicates if the
251 // next item is a keyword.
253 func (p *printer) writeCommentPrefix(pos, next token.Position, prev *ast.Comment, isKeyword bool) {
255 // the comment is the first item to be printed - don't write any whitespace
259 if pos.IsValid() && pos.Filename != p.last.Filename {
260 // comment in a different file - separate with newlines (writeNewlines will limit the number)
261 p.writeNewlines(10, true)
265 if pos.Line == p.last.Line && (prev == nil || prev.Text[1] != '/') {
266 // comment on the same line as last item:
267 // separate with at least one separator
270 // first comment of a comment group
272 for i, ch := range p.wsbuf {
275 // ignore any blanks before a comment
279 // respect existing tabs - important
280 // for proper formatting of commented structs
284 // apply pending indentation
292 // make sure there is at least one separator
294 if pos.Line == next.Line {
295 // next item is on the same line as the comment
296 // (which must be a /*-style comment): separate
297 // with a blank instead of a tab
305 // comment on a different line:
306 // separate with at least one line break
308 // first comment of a comment group
310 for i, ch := range p.wsbuf {
313 // ignore any horizontal whitespace before line breaks
317 // apply pending indentation
320 // if the next token is a keyword, apply the outdent
321 // if it appears that the comment is aligned with the
322 // keyword; otherwise assume the outdent is part of a
323 // closing block and stop (this scenario appears with
324 // comments before a case label where the comments
325 // apply to the next case instead of the current one)
326 if isKeyword && pos.Column == next.Column {
329 case newline, formfeed:
330 // TODO(gri): may want to keep formfeed info in some cases
338 // use formfeeds to break columns before a comment;
339 // this is analogous to using formfeeds to separate
340 // individual lines of /*-style comments - but make
341 // sure there is at least one line break if the previous
342 // comment was a line comment
343 n := pos.Line - p.last.Line // if !pos.IsValid(), pos.Line == 0, and n will be 0
344 if n <= 0 && prev != nil && prev.Text[1] == '/' {
347 p.writeNewlines(n, true)
351 // TODO(gri): It should be possible to convert the code below from using
352 // []byte to string and in the process eliminate some conversions.
354 // Split comment text into lines
355 func split(text []byte) [][]byte {
356 // count lines (comment text never ends in a newline)
358 for _, c := range text {
365 lines := make([][]byte, n)
368 for j, c := range text {
370 lines[n] = text[i:j] // exclude newline
371 i = j + 1 // discard newline
380 func isBlank(s []byte) bool {
381 for _, b := range s {
389 func commonPrefix(a, b []byte) []byte {
391 for i < len(a) && i < len(b) && a[i] == b[i] && (a[i] <= ' ' || a[i] == '*') {
397 func stripCommonPrefix(lines [][]byte) {
399 return // at most one line - nothing to do
403 // The heuristic in this function tries to handle a few
404 // common patterns of /*-style comments: Comments where
405 // the opening /* and closing */ are aligned and the
406 // rest of the comment text is aligned and indented with
407 // blanks or tabs, cases with a vertical "line of stars"
408 // on the left, and cases where the closing */ is on the
409 // same line as the last comment text.
411 // Compute maximum common white prefix of all but the first,
412 // last, and blank lines, and replace blank lines with empty
413 // lines (the first line starts with /* and has no prefix).
414 // In case of two-line comments, consider the last line for
415 // the prefix computation since otherwise the prefix would
418 // Note that the first and last line are never empty (they
419 // contain the opening /* and closing */ respectively) and
420 // thus they can be ignored by the blank line check.
423 for i, line := range lines[1 : len(lines)-1] {
426 lines[1+i] = nil // range starts at line 1
428 prefix = commonPrefix(line, line)
430 prefix = commonPrefix(prefix, line)
433 } else { // len(lines) == 2
435 prefix = commonPrefix(line, line)
439 * Check for vertical "line of stars" and correct prefix accordingly.
442 if i := bytes.Index(prefix, []byte{'*'}); i >= 0 {
443 // Line of stars present.
444 if i > 0 && prefix[i-1] == ' ' {
445 i-- // remove trailing blank from prefix so stars remain aligned
450 // No line of stars present.
451 // Determine the white space on the first line after the /*
452 // and before the beginning of the comment text, assume two
453 // blanks instead of the /* unless the first character after
454 // the /* is a tab. If the first comment line is empty but
455 // for the opening /*, assume up to 3 blanks or a tab. This
456 // whitespace may be found as suffix in the common prefix.
458 if isBlank(first[2:]) {
459 // no comment text on the first line:
460 // reduce prefix by up to 3 blanks or a tab
461 // if present - this keeps comment text indented
462 // relative to the /* and */'s if it was indented
463 // in the first place
465 for n := 0; n < 3 && i > 0 && prefix[i-1] == ' '; n++ {
468 if i == len(prefix) && i > 0 && prefix[i-1] == '\t' {
473 // comment text on the first line
474 suffix := make([]byte, len(first))
475 n := 2 // start after opening /*
476 for n < len(first) && first[n] <= ' ' {
480 if n > 2 && suffix[2] == '\t' {
481 // assume the '\t' compensates for the /*
484 // otherwise assume two blanks
485 suffix[0], suffix[1] = ' ', ' '
488 // Shorten the computed common prefix by the length of
489 // suffix, if it is found as suffix of the prefix.
490 if bytes.HasSuffix(prefix, suffix) {
491 prefix = prefix[0 : len(prefix)-len(suffix)]
496 // Handle last line: If it only contains a closing */, align it
497 // with the opening /*, otherwise align the text with the other
499 last := lines[len(lines)-1]
500 closing := []byte("*/")
501 i := bytes.Index(last, closing)
502 if isBlank(last[0:i]) {
503 // last line only contains closing */
506 // insert an aligning blank
509 lines[len(lines)-1] = bytes.Join([][]byte{prefix, closing}, sep)
511 // last line contains more comment text - assume
512 // it is aligned like the other lines
513 prefix = commonPrefix(prefix, last)
516 // Remove the common prefix from all but the first and empty lines.
517 for i, line := range lines[1:] {
519 lines[1+i] = line[len(prefix):] // range starts at line 1
524 func (p *printer) writeComment(comment *ast.Comment) {
527 // shortcut common case of //-style comments
529 p.writeItem(p.fset.Position(comment.Pos()), p.escape(text))
533 // for /*-style comments, print line by line and let the
534 // write function take care of the proper indentation
535 lines := split([]byte(text))
536 stripCommonPrefix(lines)
538 // write comment lines, separated by formfeed,
539 // without a line break after the last line
540 linebreak := formfeeds[0:1]
541 pos := p.fset.Position(comment.Pos())
542 for i, line := range lines {
548 p.writeItem(pos, p.escape(string(line)))
553 // writeCommentSuffix writes a line break after a comment if indicated
554 // and processes any leftover indentation information. If a line break
555 // is needed, the kind of break (newline vs formfeed) depends on the
556 // pending whitespace. writeCommentSuffix returns true if a pending
557 // formfeed was dropped from the whitespace buffer.
559 func (p *printer) writeCommentSuffix(needsLinebreak bool) (droppedFF bool) {
560 for i, ch := range p.wsbuf {
563 // ignore trailing whitespace
565 case indent, unindent:
566 // don't lose indentation information
567 case newline, formfeed:
568 // if we need a line break, keep exactly one
569 // but remember if we dropped any formfeeds
571 needsLinebreak = false
580 p.writeWhitespace(len(p.wsbuf))
582 // make sure we have a line break
584 p.write([]byte{'\n'})
590 // intersperseComments consumes all comments that appear before the next token
591 // tok and prints it together with the buffered whitespace (i.e., the whitespace
592 // that needs to be written before the next token). A heuristic is used to mix
593 // the comments and whitespace. intersperseComments returns true if a pending
594 // formfeed was dropped from the whitespace buffer.
596 func (p *printer) intersperseComments(next token.Position, tok token.Token) (droppedFF bool) {
597 var last *ast.Comment
598 for ; p.commentBefore(next); p.cindex++ {
599 for _, c := range p.comments[p.cindex].List {
600 p.writeCommentPrefix(p.fset.Position(c.Pos()), next, last, tok.IsKeyword())
607 if last.Text[1] == '*' && p.fset.Position(last.Pos()).Line == next.Line {
608 // the last comment is a /*-style comment and the next item
609 // follows on the same line: separate with an extra blank
612 // ensure that there is a line break after a //-style comment,
613 // before a closing '}' unless explicitly disabled, or at eof
615 last.Text[1] == '/' ||
616 tok == token.RBRACE && p.mode&noExtraLinebreak == 0 ||
618 return p.writeCommentSuffix(needsLinebreak)
621 // no comment was written - we should never reach here since
622 // intersperseComments should not be called in that case
623 p.internalError("intersperseComments called without pending comments")
627 // whiteWhitespace writes the first n whitespace entries.
628 func (p *printer) writeWhitespace(n int) {
631 for i := 0; i < n; i++ {
632 switch ch := p.wsbuf[i]; ch {
640 p.internalError("negative indentation:", p.indent)
643 case newline, formfeed:
644 // A line break immediately followed by a "correcting"
645 // unindent is swapped with the unindent - this permits
646 // proper label positioning. If a comment is between
647 // the line break and the label, the unindent is not
648 // part of the comment whitespace prefix and the comment
649 // will be positioned correctly indented.
650 if i+1 < n && p.wsbuf[i+1] == unindent {
651 // Use a formfeed to terminate the current section.
652 // Otherwise, a long label name on the next line leading
653 // to a wide column may increase the indentation column
654 // of lines before the label; effectively leading to wrong
656 p.wsbuf[i], p.wsbuf[i+1] = unindent, formfeed
667 // shift remaining entries down
669 for ; n < len(p.wsbuf); n++ {
670 p.wsbuf[i] = p.wsbuf[n]
673 p.wsbuf = p.wsbuf[0:i]
676 // ----------------------------------------------------------------------------
677 // Printing interface
680 func mayCombine(prev token.Token, next byte) (b bool) {
683 b = next == '.' // 1.
685 b = next == '+' // ++
687 b = next == '-' // --
689 b = next == '*' // /*
691 b = next == '-' || next == '<' // <- or <<
693 b = next == '&' || next == '^' // && or &^
698 // print prints a list of "items" (roughly corresponding to syntactic
699 // tokens, but also including whitespace and formatting information).
700 // It is the only print function that should be called directly from
701 // any of the AST printing functions in nodes.go.
703 // Whitespace is accumulated until a non-whitespace token appears. Any
704 // comments that need to appear before that token are printed first,
705 // taking into account the amount and structure of any pending white-
706 // space for best comment placement. Then, any leftover whitespace is
707 // printed, followed by the actual token.
709 func (p *printer) print(args ...interface{}) {
710 for _, f := range args {
711 next := p.pos // estimated position of next item
715 switch x := f.(type) {
717 // toggle printer mode
721 // don't add ignore's to the buffer; they
722 // may screw up "correcting" unindents (see
727 if i == cap(p.wsbuf) {
728 // Whitespace sequences are very short so this should
729 // never happen. Handle gracefully (but possibly with
730 // bad comment placement) if it does happen.
734 p.wsbuf = p.wsbuf[0 : i+1]
740 data = p.escape(x.Value)
744 if mayCombine(p.lastTok, s[0]) {
745 // the previous and the current token must be
746 // separated by a blank otherwise they combine
747 // into a different incorrect token sequence
748 // (except for token.INT followed by a '.' this
749 // should never happen because it is taken care
750 // of via binary expression formatting)
751 if len(p.wsbuf) != 0 {
752 p.internalError("whitespace buffer not empty")
754 p.wsbuf = p.wsbuf[0:1]
761 next = p.fset.Position(x) // accurate position of next item
765 fmt.Fprintf(os.Stderr, "print: unsupported argument type %T\n", f)
766 panic("go/printer type")
772 droppedFF := p.flush(next, tok)
774 // intersperse extra newlines if present in the source
775 // (don't do this in flush as it will cause extra newlines
776 // at the end of a file) - use formfeeds if we dropped one
778 p.writeNewlines(next.Line-p.pos.Line, droppedFF)
780 p.writeItem(next, data)
785 // commentBefore returns true iff the current comment occurs
786 // before the next position in the source code.
788 func (p *printer) commentBefore(next token.Position) bool {
789 return p.cindex < len(p.comments) && p.fset.Position(p.comments[p.cindex].List[0].Pos()).Offset < next.Offset
792 // Flush prints any pending comments and whitespace occurring
793 // textually before the position of the next token tok. Flush
794 // returns true if a pending formfeed character was dropped
795 // from the whitespace buffer as a result of interspersing
798 func (p *printer) flush(next token.Position, tok token.Token) (droppedFF bool) {
799 if p.commentBefore(next) {
800 // if there are comments before the next item, intersperse them
801 droppedFF = p.intersperseComments(next, tok)
803 // otherwise, write any leftover whitespace
804 p.writeWhitespace(len(p.wsbuf))
809 // ----------------------------------------------------------------------------
812 // A trimmer is an io.Writer filter for stripping tabwriter.Escape
813 // characters, trailing blanks and tabs, and for converting formfeed
814 // and vtab characters into newlines and htabs (in case no tabwriter
815 // is used). Text bracketed by tabwriter.Escape characters is passed
816 // through unchanged.
818 type trimmer struct {
824 // trimmer is implemented as a state machine.
825 // It can be in one of the following states:
827 inSpace = iota // inside space
828 inEscape // inside text bracketed by tabwriter.Escapes
829 inText // inside text
832 // Design note: It is tempting to eliminate extra blanks occurring in
833 // whitespace in this function as it could simplify some
834 // of the blanks logic in the node printing functions.
835 // However, this would mess up any formatting done by
838 func (p *trimmer) Write(data []byte) (n int, err os.Error) {
840 // p.state == inSpace:
841 // p.space is unwritten
842 // p.state == inEscape, inText:
843 // data[m:n] is unwritten
846 for n, b = range data {
848 b = '\t' // convert to htab
854 p.space.WriteByte(b) // WriteByte returns no errors
856 p.space.Reset() // discard trailing space
857 _, err = p.output.Write(newlines[0:1]) // write newline
858 case tabwriter.Escape:
859 _, err = p.output.Write(p.space.Bytes())
861 m = n + 1 // +1: skip tabwriter.Escape
863 _, err = p.output.Write(p.space.Bytes())
868 if b == tabwriter.Escape {
869 _, err = p.output.Write(data[m:n])
876 _, err = p.output.Write(data[m:n])
879 p.space.WriteByte(b) // WriteByte returns no errors
881 _, err = p.output.Write(data[m:n])
884 _, err = p.output.Write(newlines[0:1]) // write newline
885 case tabwriter.Escape:
886 _, err = p.output.Write(data[m:n])
888 m = n + 1 // +1: skip tabwriter.Escape
900 case inEscape, inText:
901 _, err = p.output.Write(data[m:n])
909 // ----------------------------------------------------------------------------
912 // General printing is controlled with these Config.Mode flags.
914 RawFormat uint = 1 << iota // do not use a tabwriter; if set, UseSpaces is ignored
915 TabIndent // use tabs for indentation independent of UseSpaces
916 UseSpaces // use spaces instead of tabs for alignment
919 // A Config node controls the output of Fprint.
921 Mode uint // default: 0
922 Tabwidth int // default: 8
925 // fprint implements Fprint and takes a nodesSizes map for setting up the printer state.
926 func (cfg *Config) fprint(output io.Writer, fset *token.FileSet, node interface{}, nodeSizes map[ast.Node]int) (int, os.Error) {
927 // redirect output through a trimmer to eliminate trailing whitespace
928 // (Input to a tabwriter must be untrimmed since trailing tabs provide
929 // formatting information. The tabwriter could provide trimming
930 // functionality but no tabwriter is used when RawFormat is set.)
931 output = &trimmer{output: output}
933 // setup tabwriter if needed and redirect output
934 var tw *tabwriter.Writer
935 if cfg.Mode&RawFormat == 0 {
936 minwidth := cfg.Tabwidth
938 padchar := byte('\t')
939 if cfg.Mode&UseSpaces != 0 {
943 twmode := tabwriter.DiscardEmptyColumns
944 if cfg.Mode&TabIndent != 0 {
946 twmode |= tabwriter.TabIndent
949 tw = tabwriter.NewWriter(output, minwidth, cfg.Tabwidth, 1, padchar, twmode)
953 // setup printer and print node
955 p.init(output, cfg, fset, nodeSizes)
957 switch n := node.(type) {
959 p.useNodeComments = true
960 p.expr(n, ignoreMultiLine)
962 p.useNodeComments = true
963 // A labeled statement will un-indent to position the
964 // label. Set indent to 1 so we don't get indent "underflow".
965 if _, labeledStmt := n.(*ast.LabeledStmt); labeledStmt {
968 p.stmt(n, false, ignoreMultiLine)
970 p.useNodeComments = true
971 p.decl(n, ignoreMultiLine)
973 p.useNodeComments = true
974 p.spec(n, 1, false, ignoreMultiLine)
976 p.comments = n.Comments
977 p.useNodeComments = n.Comments == nil
980 p.errors <- fmt.Errorf("printer.Fprint: unsupported node type %T", n)
983 p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF)
984 p.errors <- nil // no errors
986 err := <-p.errors // wait for completion of goroutine
988 // flush tabwriter, if any
990 tw.Flush() // ignore errors
993 return p.written, err
996 // Fprint "pretty-prints" an AST node to output and returns the number
997 // of bytes written and an error (if any) for a given configuration cfg.
998 // Position information is interpreted relative to the file set fset.
999 // The node type must be *ast.File, or assignment-compatible to ast.Expr,
1000 // ast.Decl, ast.Spec, or ast.Stmt.
1002 func (cfg *Config) Fprint(output io.Writer, fset *token.FileSet, node interface{}) (int, os.Error) {
1003 return cfg.fprint(output, fset, node, make(map[ast.Node]int))
1006 // Fprint "pretty-prints" an AST node to output.
1007 // It calls Config.Fprint with default settings.
1009 func Fprint(output io.Writer, fset *token.FileSet, node interface{}) os.Error {
1010 _, err := (&Config{Tabwidth: 8}).Fprint(output, fset, node) // don't care about number of bytes written