1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Godoc comment extraction and comment -> HTML formatting.
13 "text/template" // for HTMLEscape
19 ldquo = []byte("“")
20 rdquo = []byte("”")
23 // Escape comment text for HTML. If nice is set,
24 // also turn `` into “ and '' into ”.
25 func commentEscape(w io.Writer, text string, nice bool) {
28 for i := 0; i < len(text)-1; i++ {
30 if ch == text[i+1] && (ch == '`' || ch == '\'') {
31 template.HTMLEscape(w, []byte(text[last:i]))
39 i++ // loop will add one more
43 template.HTMLEscape(w, []byte(text[last:]))
47 // Regexp for Go identifiers
48 identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
51 protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
52 hostPart = `[a-zA-Z0-9_@\-]+`
53 filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
54 urlRx = protocol + `//` + // http://
55 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
56 filePart + `([:.,]` + filePart + `)*`
59 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
62 html_a = []byte(`<a href="`)
63 html_aq = []byte(`">`)
64 html_enda = []byte("</a>")
65 html_i = []byte("<i>")
66 html_endi = []byte("</i>")
67 html_p = []byte("<p>\n")
68 html_endp = []byte("</p>\n")
69 html_pre = []byte("<pre>")
70 html_endpre = []byte("</pre>\n")
71 html_h = []byte(`<h3 id="`)
72 html_hq = []byte(`">`)
73 html_endh = []byte("</h3>\n")
76 // Emphasize and escape a line of text for HTML. URLs are converted into links;
77 // if the URL also appears in the words map, the link is taken from the map (if
78 // the corresponding map value is the empty string, the URL is not converted
79 // into a link). Go identifiers that appear in the words map are italicized; if
80 // the corresponding map value is not the empty string, it is considered a URL
81 // and the word is converted into a link. If nice is set, the remaining text's
82 // appearance is improved where it makes sense (e.g., `` is turned into “
83 // and '' into ”).
84 func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
86 m := matchRx.FindStringSubmatchIndex(line)
90 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
92 // write text before match
93 commentEscape(w, line[0:m[0]], nice)
96 match := line[m[0]:m[1]]
100 url, italics = words[string(match)]
103 // match against first parenthesized sub-regexp; must be match against urlRx
105 // no alternative URL in words list, use match instead
108 italics = false // don't italicize URLs
114 template.HTMLEscape(w, []byte(url))
120 commentEscape(w, match, nice)
131 commentEscape(w, line, nice)
134 func indentLen(s string) int {
136 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
142 func isBlank(s string) bool {
143 return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
146 func commonPrefix(a, b string) string {
148 for i < len(a) && i < len(b) && a[i] == b[i] {
154 func unindent(block []string) {
159 // compute maximum common white prefix
160 prefix := block[0][0:indentLen(block[0])]
161 for _, line := range block {
163 prefix = commonPrefix(prefix, line[0:indentLen(line)])
169 for i, line := range block {
176 // heading returns the trimmed line if it passes as a section heading;
177 // otherwise it returns the empty string.
178 func heading(line string) string {
179 line = strings.TrimSpace(line)
184 // a heading must start with an uppercase letter
185 r, _ := utf8.DecodeRuneInString(line)
186 if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
190 // it must end in a letter or digit:
191 r, _ = utf8.DecodeLastRuneInString(line)
192 if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
196 // exclude lines with illegal characters
197 if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
201 // allow "'" for possessive "'s" only
203 i := strings.IndexRune(b, '\'')
207 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
208 return "" // not followed by "s "
229 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
231 func anchorID(line string) string {
232 return nonAlphaNumRx.ReplaceAllString(line, "_")
235 // ToHTML converts comment text to formatted HTML.
236 // The comment was prepared by DocReader,
237 // so it is known not to have leading, trailing blank lines
238 // nor to have trailing spaces at the end of lines.
239 // The comment markers have already been removed.
241 // Turn each run of multiple \n into </p><p>.
242 // Turn each run of indented lines into a <pre> block without indent.
243 // Enclose headings with header tags.
245 // URLs in the comment text are converted into links; if the URL also appears
246 // in the words map, the link is taken from the map (if the corresponding map
247 // value is the empty string, the URL is not converted into a link).
249 // Go identifiers that appear in the words map are italicized; if the corresponding
250 // map value is not the empty string, it is considered a URL and the word is converted
252 func ToHTML(w io.Writer, text string, words map[string]string) {
253 for _, b := range blocks(text) {
257 for _, line := range b.lines {
258 emphasize(w, line, words, true)
264 for _, line := range b.lines {
270 commentEscape(w, line, true)
278 for _, line := range b.lines {
279 emphasize(w, line, nil, false)
286 func blocks(text string) []block {
292 lastWasHeading = false
297 out = append(out, block{opPara, para})
302 lines := strings.SplitAfter(text, "\n")
304 for i := 0; i < len(lines); {
313 if indentLen(line) > 0 {
317 // count indented or blank lines
319 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
322 // but not trailing blank lines
323 for j > i && isBlank(lines[j-1]) {
331 // put those lines in a pre block
332 out = append(out, block{opPre, pre})
333 lastWasHeading = false
337 if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
338 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
339 // current line is non-blank, surrounded by blank lines
340 // and the next non-blank line is not indented: this
341 // might be a heading.
342 if head := heading(line); head != "" {
344 out = append(out, block{opHead, []string{head}})
346 lastWasHeading = true
353 lastWasHeading = false
354 para = append(para, lines[i])
362 // ToText prepares comment text for presentation in textual output.
363 // It wraps paragraphs of text to width or fewer Unicode code points
364 // and then prefixes each line with the indent. In preformatted sections
365 // (such as program text), it prefixes each non-blank line with preIndent.
366 func ToText(w io.Writer, text string, indent, preIndent string, width int) {
372 for _, b := range blocks(text) {
375 // l.write will add leading newline if required
376 for _, line := range b.lines {
382 for _, line := range b.lines {
388 for _, line := range b.lines {
390 w.Write([]byte(preIndent))
391 w.Write([]byte(line))
398 type lineWrapper struct {
407 var nl = []byte("\n")
408 var space = []byte(" ")
410 func (l *lineWrapper) write(text string) {
411 if l.n == 0 && l.printed {
412 l.out.Write(nl) // blank line before new paragraph
416 for _, f := range strings.Fields(text) {
417 w := utf8.RuneCountInString(f)
418 // wrap if line is too long
419 if l.n > 0 && l.n+l.pendSpace+w > l.width {
425 l.out.Write([]byte(l.indent))
427 l.out.Write(space[:l.pendSpace])
428 l.out.Write([]byte(f))
429 l.n += l.pendSpace + w
434 func (l *lineWrapper) flush() {