...
Run Format

Source file src/go/doc/comment.go

Documentation: go/doc

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Godoc comment extraction and comment -> HTML formatting.
     6  
     7  package doc
     8  
     9  import (
    10  	"io"
    11  	"regexp"
    12  	"strings"
    13  	"text/template" // for HTMLEscape
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  var (
    19  	ldquo = []byte("“")
    20  	rdquo = []byte("”")
    21  )
    22  
    23  // Escape comment text for HTML. If nice is set,
    24  // also turn `` into “ and '' into ”.
    25  func commentEscape(w io.Writer, text string, nice bool) {
    26  	last := 0
    27  	if nice {
    28  		for i := 0; i < len(text)-1; i++ {
    29  			ch := text[i]
    30  			if ch == text[i+1] && (ch == '`' || ch == '\'') {
    31  				template.HTMLEscape(w, []byte(text[last:i]))
    32  				last = i + 2
    33  				switch ch {
    34  				case '`':
    35  					w.Write(ldquo)
    36  				case '\'':
    37  					w.Write(rdquo)
    38  				}
    39  				i++ // loop will add one more
    40  			}
    41  		}
    42  	}
    43  	template.HTMLEscape(w, []byte(text[last:]))
    44  }
    45  
    46  const (
    47  	// Regexp for Go identifiers
    48  	identRx = `[\pL_][\pL_0-9]*`
    49  
    50  	// Regexp for URLs
    51  	// Match parens, and check in pairedParensPrefixLen for balance - see #5043
    52  	// Match .,:;?! within path, but not at end - see #18139, #16565
    53  	// This excludes some rare yet valid urls ending in common punctuation
    54  	// in order to allow sentences ending in URLs.
    55  
    56  	// protocol (required) e.g. http
    57  	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
    58  	// host (required) e.g. www.example.com or [::1]:8080
    59  	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
    60  	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
    61  	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
    62  
    63  	urlRx = protoPart + `://` + hostPart + pathPart
    64  )
    65  
    66  var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
    67  
    68  var (
    69  	html_a      = []byte(`<a href="`)
    70  	html_aq     = []byte(`">`)
    71  	html_enda   = []byte("</a>")
    72  	html_i      = []byte("<i>")
    73  	html_endi   = []byte("</i>")
    74  	html_p      = []byte("<p>\n")
    75  	html_endp   = []byte("</p>\n")
    76  	html_pre    = []byte("<pre>")
    77  	html_endpre = []byte("</pre>\n")
    78  	html_h      = []byte(`<h3 id="`)
    79  	html_hq     = []byte(`">`)
    80  	html_endh   = []byte("</h3>\n")
    81  )
    82  
    83  // pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
    84  func pairedParensPrefixLen(s string) int {
    85  	parens := 0
    86  	l := len(s)
    87  	for i, ch := range s {
    88  		switch ch {
    89  		case '(':
    90  			if parens == 0 {
    91  				l = i
    92  			}
    93  			parens++
    94  		case ')':
    95  			parens--
    96  			if parens == 0 {
    97  				l = len(s)
    98  			} else if parens < 0 {
    99  				return i
   100  			}
   101  		}
   102  	}
   103  	return l
   104  }
   105  
   106  // Emphasize and escape a line of text for HTML. URLs are converted into links;
   107  // if the URL also appears in the words map, the link is taken from the map (if
   108  // the corresponding map value is the empty string, the URL is not converted
   109  // into a link). Go identifiers that appear in the words map are italicized; if
   110  // the corresponding map value is not the empty string, it is considered a URL
   111  // and the word is converted into a link. If nice is set, the remaining text's
   112  // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
   113  // and '' into &rdquo;).
   114  func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
   115  	for {
   116  		m := matchRx.FindStringSubmatchIndex(line)
   117  		if m == nil {
   118  			break
   119  		}
   120  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
   121  
   122  		// write text before match
   123  		commentEscape(w, line[0:m[0]], nice)
   124  
   125  		// adjust match if necessary
   126  		match := line[m[0]:m[1]]
   127  		if n := pairedParensPrefixLen(match); n < len(match) {
   128  			// match contains unpaired parentheses (rare);
   129  			// redo matching with shortened line for correct indices
   130  			m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
   131  			match = match[:n]
   132  		}
   133  
   134  		// analyze match
   135  		url := ""
   136  		italics := false
   137  		if words != nil {
   138  			url, italics = words[match]
   139  		}
   140  		if m[2] >= 0 {
   141  			// match against first parenthesized sub-regexp; must be match against urlRx
   142  			if !italics {
   143  				// no alternative URL in words list, use match instead
   144  				url = match
   145  			}
   146  			italics = false // don't italicize URLs
   147  		}
   148  
   149  		// write match
   150  		if len(url) > 0 {
   151  			w.Write(html_a)
   152  			template.HTMLEscape(w, []byte(url))
   153  			w.Write(html_aq)
   154  		}
   155  		if italics {
   156  			w.Write(html_i)
   157  		}
   158  		commentEscape(w, match, nice)
   159  		if italics {
   160  			w.Write(html_endi)
   161  		}
   162  		if len(url) > 0 {
   163  			w.Write(html_enda)
   164  		}
   165  
   166  		// advance
   167  		line = line[m[1]:]
   168  	}
   169  	commentEscape(w, line, nice)
   170  }
   171  
   172  func indentLen(s string) int {
   173  	i := 0
   174  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   175  		i++
   176  	}
   177  	return i
   178  }
   179  
   180  func isBlank(s string) bool {
   181  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   182  }
   183  
   184  func commonPrefix(a, b string) string {
   185  	i := 0
   186  	for i < len(a) && i < len(b) && a[i] == b[i] {
   187  		i++
   188  	}
   189  	return a[0:i]
   190  }
   191  
   192  func unindent(block []string) {
   193  	if len(block) == 0 {
   194  		return
   195  	}
   196  
   197  	// compute maximum common white prefix
   198  	prefix := block[0][0:indentLen(block[0])]
   199  	for _, line := range block {
   200  		if !isBlank(line) {
   201  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
   202  		}
   203  	}
   204  	n := len(prefix)
   205  
   206  	// remove
   207  	for i, line := range block {
   208  		if !isBlank(line) {
   209  			block[i] = line[n:]
   210  		}
   211  	}
   212  }
   213  
   214  // heading returns the trimmed line if it passes as a section heading;
   215  // otherwise it returns the empty string.
   216  func heading(line string) string {
   217  	line = strings.TrimSpace(line)
   218  	if len(line) == 0 {
   219  		return ""
   220  	}
   221  
   222  	// a heading must start with an uppercase letter
   223  	r, _ := utf8.DecodeRuneInString(line)
   224  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   225  		return ""
   226  	}
   227  
   228  	// it must end in a letter or digit:
   229  	r, _ = utf8.DecodeLastRuneInString(line)
   230  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   231  		return ""
   232  	}
   233  
   234  	// exclude lines with illegal characters. we allow "(),"
   235  	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
   236  		return ""
   237  	}
   238  
   239  	// allow "'" for possessive "'s" only
   240  	for b := line; ; {
   241  		i := strings.IndexRune(b, '\'')
   242  		if i < 0 {
   243  			break
   244  		}
   245  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   246  			return "" // not followed by "s "
   247  		}
   248  		b = b[i+2:]
   249  	}
   250  
   251  	// allow "." when followed by non-space
   252  	for b := line;; {
   253  		i := strings.IndexRune(b, '.')
   254  		if i < 0 {
   255  			break
   256  		}
   257  		if i+1 >= len(b) || b[i+1] == ' ' {
   258  			return "" // not followed by non-space
   259  		}
   260  		b = b[i+1:]
   261  	}
   262  
   263  	return line
   264  }
   265  
   266  type op int
   267  
   268  const (
   269  	opPara op = iota
   270  	opHead
   271  	opPre
   272  )
   273  
   274  type block struct {
   275  	op    op
   276  	lines []string
   277  }
   278  
   279  var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
   280  
   281  func anchorID(line string) string {
   282  	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
   283  	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
   284  }
   285  
   286  // ToHTML converts comment text to formatted HTML.
   287  // The comment was prepared by DocReader,
   288  // so it is known not to have leading, trailing blank lines
   289  // nor to have trailing spaces at the end of lines.
   290  // The comment markers have already been removed.
   291  //
   292  // Each span of unindented non-blank lines is converted into
   293  // a single paragraph. There is one exception to the rule: a span that
   294  // consists of a single line, is followed by another paragraph span,
   295  // begins with a capital letter, and contains no punctuation
   296  // other than parentheses and commas is formatted as a heading.
   297  //
   298  // A span of indented lines is converted into a <pre> block,
   299  // with the common indent prefix removed.
   300  //
   301  // URLs in the comment text are converted into links; if the URL also appears
   302  // in the words map, the link is taken from the map (if the corresponding map
   303  // value is the empty string, the URL is not converted into a link).
   304  //
   305  // Go identifiers that appear in the words map are italicized; if the corresponding
   306  // map value is not the empty string, it is considered a URL and the word is converted
   307  // into a link.
   308  func ToHTML(w io.Writer, text string, words map[string]string) {
   309  	for _, b := range blocks(text) {
   310  		switch b.op {
   311  		case opPara:
   312  			w.Write(html_p)
   313  			for _, line := range b.lines {
   314  				emphasize(w, line, words, true)
   315  			}
   316  			w.Write(html_endp)
   317  		case opHead:
   318  			w.Write(html_h)
   319  			id := ""
   320  			for _, line := range b.lines {
   321  				if id == "" {
   322  					id = anchorID(line)
   323  					w.Write([]byte(id))
   324  					w.Write(html_hq)
   325  				}
   326  				commentEscape(w, line, true)
   327  			}
   328  			if id == "" {
   329  				w.Write(html_hq)
   330  			}
   331  			w.Write(html_endh)
   332  		case opPre:
   333  			w.Write(html_pre)
   334  			for _, line := range b.lines {
   335  				emphasize(w, line, nil, false)
   336  			}
   337  			w.Write(html_endpre)
   338  		}
   339  	}
   340  }
   341  
   342  func blocks(text string) []block {
   343  	var (
   344  		out  []block
   345  		para []string
   346  
   347  		lastWasBlank   = false
   348  		lastWasHeading = false
   349  	)
   350  
   351  	close := func() {
   352  		if para != nil {
   353  			out = append(out, block{opPara, para})
   354  			para = nil
   355  		}
   356  	}
   357  
   358  	lines := strings.SplitAfter(text, "\n")
   359  	unindent(lines)
   360  	for i := 0; i < len(lines); {
   361  		line := lines[i]
   362  		if isBlank(line) {
   363  			// close paragraph
   364  			close()
   365  			i++
   366  			lastWasBlank = true
   367  			continue
   368  		}
   369  		if indentLen(line) > 0 {
   370  			// close paragraph
   371  			close()
   372  
   373  			// count indented or blank lines
   374  			j := i + 1
   375  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   376  				j++
   377  			}
   378  			// but not trailing blank lines
   379  			for j > i && isBlank(lines[j-1]) {
   380  				j--
   381  			}
   382  			pre := lines[i:j]
   383  			i = j
   384  
   385  			unindent(pre)
   386  
   387  			// put those lines in a pre block
   388  			out = append(out, block{opPre, pre})
   389  			lastWasHeading = false
   390  			continue
   391  		}
   392  
   393  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   394  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   395  			// current line is non-blank, surrounded by blank lines
   396  			// and the next non-blank line is not indented: this
   397  			// might be a heading.
   398  			if head := heading(line); head != "" {
   399  				close()
   400  				out = append(out, block{opHead, []string{head}})
   401  				i += 2
   402  				lastWasHeading = true
   403  				continue
   404  			}
   405  		}
   406  
   407  		// open paragraph
   408  		lastWasBlank = false
   409  		lastWasHeading = false
   410  		para = append(para, lines[i])
   411  		i++
   412  	}
   413  	close()
   414  
   415  	return out
   416  }
   417  
   418  // ToText prepares comment text for presentation in textual output.
   419  // It wraps paragraphs of text to width or fewer Unicode code points
   420  // and then prefixes each line with the indent. In preformatted sections
   421  // (such as program text), it prefixes each non-blank line with preIndent.
   422  func ToText(w io.Writer, text string, indent, preIndent string, width int) {
   423  	l := lineWrapper{
   424  		out:    w,
   425  		width:  width,
   426  		indent: indent,
   427  	}
   428  	for _, b := range blocks(text) {
   429  		switch b.op {
   430  		case opPara:
   431  			// l.write will add leading newline if required
   432  			for _, line := range b.lines {
   433  				l.write(line)
   434  			}
   435  			l.flush()
   436  		case opHead:
   437  			w.Write(nl)
   438  			for _, line := range b.lines {
   439  				l.write(line + "\n")
   440  			}
   441  			l.flush()
   442  		case opPre:
   443  			w.Write(nl)
   444  			for _, line := range b.lines {
   445  				if isBlank(line) {
   446  					w.Write([]byte("\n"))
   447  				} else {
   448  					w.Write([]byte(preIndent))
   449  					w.Write([]byte(line))
   450  				}
   451  			}
   452  		}
   453  	}
   454  }
   455  
   456  type lineWrapper struct {
   457  	out       io.Writer
   458  	printed   bool
   459  	width     int
   460  	indent    string
   461  	n         int
   462  	pendSpace int
   463  }
   464  
   465  var nl = []byte("\n")
   466  var space = []byte(" ")
   467  
   468  func (l *lineWrapper) write(text string) {
   469  	if l.n == 0 && l.printed {
   470  		l.out.Write(nl) // blank line before new paragraph
   471  	}
   472  	l.printed = true
   473  
   474  	for _, f := range strings.Fields(text) {
   475  		w := utf8.RuneCountInString(f)
   476  		// wrap if line is too long
   477  		if l.n > 0 && l.n+l.pendSpace+w > l.width {
   478  			l.out.Write(nl)
   479  			l.n = 0
   480  			l.pendSpace = 0
   481  		}
   482  		if l.n == 0 {
   483  			l.out.Write([]byte(l.indent))
   484  		}
   485  		l.out.Write(space[:l.pendSpace])
   486  		l.out.Write([]byte(f))
   487  		l.n += l.pendSpace + w
   488  		l.pendSpace = 1
   489  	}
   490  }
   491  
   492  func (l *lineWrapper) flush() {
   493  	if l.n == 0 {
   494  		return
   495  	}
   496  	l.out.Write(nl)
   497  	l.pendSpace = 0
   498  	l.n = 0
   499  }
   500  

View as plain text