libgo/go/go/doc/comment.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Godoc comment extraction and comment -> HTML formatting.
   6
   7 package doc
   8
   9 import (
  10         "io"
  11         "regexp"
  12         "strings"
  13         "text/template" // for HTMLEscape
  14         "unicode"
  15         "unicode/utf8"
  16 )
  17
  18 var (
  19         ldquo = []byte("&ldquo;")
  20         rdquo = []byte("&rdquo;")
  21 )
  22
  23 // Escape comment text for HTML. If nice is set,
  24 // also turn `` into &ldquo; and '' into &rdquo;.
  25 func commentEscape(w io.Writer, text string, nice bool) {
  26         last := 0
  27         if nice {
  28                 for i := 0; i < len(text)-1; i++ {
  29                         ch := text[i]
  30                         if ch == text[i+1] && (ch == '`' || ch == '\'') {
  31                                 template.HTMLEscape(w, []byte(text[last:i]))
  32                                 last = i + 2
  33                                 switch ch {
  34                                 case '`':
  35                                         w.Write(ldquo)
  36                                 case '\'':
  37                                         w.Write(rdquo)
  38                                 }
  39                                 i++ // loop will add one more
  40                         }
  41                 }
  42         }
  43         template.HTMLEscape(w, []byte(text[last:]))
  44 }
  45
  46 const (
  47         // Regexp for Go identifiers
  48         identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
  49
  50         // Regexp for URLs
  51         protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
  52         hostPart = `[a-zA-Z0-9_@\-]+`
  53         filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
  54         urlRx    = protocol + `//` + // http://
  55                 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
  56                 filePart + `([:.,]` + filePart + `)*`
  57 )
  58
  59 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
  60
  61 var (
  62         html_a      = []byte(`<a href="`)
  63         html_aq     = []byte(`">`)
  64         html_enda   = []byte("</a>")
  65         html_i      = []byte("<i>")
  66         html_endi   = []byte("</i>")
  67         html_p      = []byte("<p>\n")
  68         html_endp   = []byte("</p>\n")
  69         html_pre    = []byte("<pre>")
  70         html_endpre = []byte("</pre>\n")
  71         html_h      = []byte(`<h3 id="`)
  72         html_hq     = []byte(`">`)
  73         html_endh   = []byte("</h3>\n")
  74 )
  75
  76 // Emphasize and escape a line of text for HTML. URLs are converted into links;
  77 // if the URL also appears in the words map, the link is taken from the map (if
  78 // the corresponding map value is the empty string, the URL is not converted
  79 // into a link). Go identifiers that appear in the words map are italicized; if
  80 // the corresponding map value is not the empty string, it is considered a URL
  81 // and the word is converted into a link. If nice is set, the remaining text's
  82 // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
  83 // and '' into &rdquo;).
  84 func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
  85         for {
  86                 m := matchRx.FindStringSubmatchIndex(line)
  87                 if m == nil {
  88                         break
  89                 }
  90                 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
  91
  92                 // write text before match
  93                 commentEscape(w, line[0:m[0]], nice)
  94
  95                 // analyze match
  96                 match := line[m[0]:m[1]]
  97                 url := ""
  98                 italics := false
  99                 if words != nil {
 100                         url, italics = words[string(match)]
 101                 }
 102                 if m[2] >= 0 {
 103                         // match against first parenthesized sub-regexp; must be match against urlRx
 104                         if !italics {
 105                                 // no alternative URL in words list, use match instead
 106                                 url = string(match)
 107                         }
 108                         italics = false // don't italicize URLs
 109                 }
 110
 111                 // write match
 112                 if len(url) > 0 {
 113                         w.Write(html_a)
 114                         template.HTMLEscape(w, []byte(url))
 115                         w.Write(html_aq)
 116                 }
 117                 if italics {
 118                         w.Write(html_i)
 119                 }
 120                 commentEscape(w, match, nice)
 121                 if italics {
 122                         w.Write(html_endi)
 123                 }
 124                 if len(url) > 0 {
 125                         w.Write(html_enda)
 126                 }
 127
 128                 // advance
 129                 line = line[m[1]:]
 130         }
 131         commentEscape(w, line, nice)
 132 }
 133
 134 func indentLen(s string) int {
 135         i := 0
 136         for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
 137                 i++
 138         }
 139         return i
 140 }
 141
 142 func isBlank(s string) bool {
 143         return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
 144 }
 145
 146 func commonPrefix(a, b string) string {
 147         i := 0
 148         for i < len(a) && i < len(b) && a[i] == b[i] {
 149                 i++
 150         }
 151         return a[0:i]
 152 }
 153
 154 func unindent(block []string) {
 155         if len(block) == 0 {
 156                 return
 157         }
 158
 159         // compute maximum common white prefix
 160         prefix := block[0][0:indentLen(block[0])]
 161         for _, line := range block {
 162                 if !isBlank(line) {
 163                         prefix = commonPrefix(prefix, line[0:indentLen(line)])
 164                 }
 165         }
 166         n := len(prefix)
 167
 168         // remove
 169         for i, line := range block {
 170                 if !isBlank(line) {
 171                         block[i] = line[n:]
 172                 }
 173         }
 174 }
 175
 176 // heading returns the trimmed line if it passes as a section heading;
 177 // otherwise it returns the empty string.
 178 func heading(line string) string {
 179         line = strings.TrimSpace(line)
 180         if len(line) == 0 {
 181                 return ""
 182         }
 183
 184         // a heading must start with an uppercase letter
 185         r, _ := utf8.DecodeRuneInString(line)
 186         if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
 187                 return ""
 188         }
 189
 190         // it must end in a letter or digit:
 191         r, _ = utf8.DecodeLastRuneInString(line)
 192         if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
 193                 return ""
 194         }
 195
 196         // exclude lines with illegal characters
 197         if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
 198                 return ""
 199         }
 200
 201         // allow "'" for possessive "'s" only
 202         for b := line; ; {
 203                 i := strings.IndexRune(b, '\'')
 204                 if i < 0 {
 205                         break
 206                 }
 207                 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
 208                         return "" // not followed by "s "
 209                 }
 210                 b = b[i+2:]
 211         }
 212
 213         return line
 214 }
 215
 216 type op int
 217
 218 const (
 219         opPara op = iota
 220         opHead
 221         opPre
 222 )
 223
 224 type block struct {
 225         op    op
 226         lines []string
 227 }
 228
 229 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
 230
 231 func anchorID(line string) string {
 232         return nonAlphaNumRx.ReplaceAllString(line, "_")
 233 }
 234
 235 // ToHTML converts comment text to formatted HTML.
 236 // The comment was prepared by DocReader,
 237 // so it is known not to have leading, trailing blank lines
 238 // nor to have trailing spaces at the end of lines.
 239 // The comment markers have already been removed.
 240 //
 241 // Turn each run of multiple \n into </p><p>.
 242 // Turn each run of indented lines into a <pre> block without indent.
 243 // Enclose headings with header tags.
 244 //
 245 // URLs in the comment text are converted into links; if the URL also appears
 246 // in the words map, the link is taken from the map (if the corresponding map
 247 // value is the empty string, the URL is not converted into a link).
 248 //
 249 // Go identifiers that appear in the words map are italicized; if the corresponding
 250 // map value is not the empty string, it is considered a URL and the word is converted
 251 // into a link.
 252 func ToHTML(w io.Writer, text string, words map[string]string) {
 253         for _, b := range blocks(text) {
 254                 switch b.op {
 255                 case opPara:
 256                         w.Write(html_p)
 257                         for _, line := range b.lines {
 258                                 emphasize(w, line, words, true)
 259                         }
 260                         w.Write(html_endp)
 261                 case opHead:
 262                         w.Write(html_h)
 263                         id := ""
 264                         for _, line := range b.lines {
 265                                 if id == "" {
 266                                         id = anchorID(line)
 267                                         w.Write([]byte(id))
 268                                         w.Write(html_hq)
 269                                 }
 270                                 commentEscape(w, line, true)
 271                         }
 272                         if id == "" {
 273                                 w.Write(html_hq)
 274                         }
 275                         w.Write(html_endh)
 276                 case opPre:
 277                         w.Write(html_pre)
 278                         for _, line := range b.lines {
 279                                 emphasize(w, line, nil, false)
 280                         }
 281                         w.Write(html_endpre)
 282                 }
 283         }
 284 }
 285
 286 func blocks(text string) []block {
 287         var (
 288                 out  []block
 289                 para []string
 290
 291                 lastWasBlank   = false
 292                 lastWasHeading = false
 293         )
 294
 295         close := func() {
 296                 if para != nil {
 297                         out = append(out, block{opPara, para})
 298                         para = nil
 299                 }
 300         }
 301
 302         lines := strings.SplitAfter(text, "\n")
 303         unindent(lines)
 304         for i := 0; i < len(lines); {
 305                 line := lines[i]
 306                 if isBlank(line) {
 307                         // close paragraph
 308                         close()
 309                         i++
 310                         lastWasBlank = true
 311                         continue
 312                 }
 313                 if indentLen(line) > 0 {
 314                         // close paragraph
 315                         close()
 316
 317                         // count indented or blank lines
 318                         j := i + 1
 319                         for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
 320                                 j++
 321                         }
 322                         // but not trailing blank lines
 323                         for j > i && isBlank(lines[j-1]) {
 324                                 j--
 325                         }
 326                         pre := lines[i:j]
 327                         i = j
 328
 329                         unindent(pre)
 330
 331                         // put those lines in a pre block
 332                         out = append(out, block{opPre, pre})
 333                         lastWasHeading = false
 334                         continue
 335                 }
 336
 337                 if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
 338                         isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
 339                         // current line is non-blank, surrounded by blank lines
 340                         // and the next non-blank line is not indented: this
 341                         // might be a heading.
 342                         if head := heading(line); head != "" {
 343                                 close()
 344                                 out = append(out, block{opHead, []string{head}})
 345                                 i += 2
 346                                 lastWasHeading = true
 347                                 continue
 348                         }
 349                 }
 350
 351                 // open paragraph
 352                 lastWasBlank = false
 353                 lastWasHeading = false
 354                 para = append(para, lines[i])
 355                 i++
 356         }
 357         close()
 358
 359         return out
 360 }
 361
 362 // ToText prepares comment text for presentation in textual output.
 363 // It wraps paragraphs of text to width or fewer Unicode code points
 364 // and then prefixes each line with the indent.  In preformatted sections
 365 // (such as program text), it prefixes each non-blank line with preIndent.
 366 func ToText(w io.Writer, text string, indent, preIndent string, width int) {
 367         l := lineWrapper{
 368                 out:    w,
 369                 width:  width,
 370                 indent: indent,
 371         }
 372         for _, b := range blocks(text) {
 373                 switch b.op {
 374                 case opPara:
 375                         // l.write will add leading newline if required
 376                         for _, line := range b.lines {
 377                                 l.write(line)
 378                         }
 379                         l.flush()
 380                 case opHead:
 381                         w.Write(nl)
 382                         for _, line := range b.lines {
 383                                 l.write(line + "\n")
 384                         }
 385                         l.flush()
 386                 case opPre:
 387                         w.Write(nl)
 388                         for _, line := range b.lines {
 389                                 if !isBlank(line) {
 390                                         w.Write([]byte(preIndent))
 391                                         w.Write([]byte(line))
 392                                 }
 393                         }
 394                 }
 395         }
 396 }
 397
 398 type lineWrapper struct {
 399         out       io.Writer
 400         printed   bool
 401         width     int
 402         indent    string
 403         n         int
 404         pendSpace int
 405 }
 406
 407 var nl = []byte("\n")
 408 var space = []byte(" ")
 409
 410 func (l *lineWrapper) write(text string) {
 411         if l.n == 0 && l.printed {
 412                 l.out.Write(nl) // blank line before new paragraph
 413         }
 414         l.printed = true
 415
 416         for _, f := range strings.Fields(text) {
 417                 w := utf8.RuneCountInString(f)
 418                 // wrap if line is too long
 419                 if l.n > 0 && l.n+l.pendSpace+w > l.width {
 420                         l.out.Write(nl)
 421                         l.n = 0
 422                         l.pendSpace = 0
 423                 }
 424                 if l.n == 0 {
 425                         l.out.Write([]byte(l.indent))
 426                 }
 427                 l.out.Write(space[:l.pendSpace])
 428                 l.out.Write([]byte(f))
 429                 l.n += l.pendSpace + w
 430                 l.pendSpace = 1
 431         }
 432 }
 433
 434 func (l *lineWrapper) flush() {
 435         if l.n == 0 {
 436                 return
 437         }
 438         l.out.Write(nl)
 439         l.pendSpace = 0
 440         l.n = 0
 441 }