Source file src/go/doc/comment.go

Documentation: go/doc

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Godoc comment extraction and comment -> HTML formatting.
     6  
     7  package doc
     8  
     9  import (
    10  	"bytes"
    11  	"io"
    12  	"strings"
    13  	"text/template" // for HTMLEscape
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  const (
    19  	ldquo = "“"
    20  	rdquo = "”"
    21  	ulquo = "“"
    22  	urquo = "”"
    23  )
    24  
    25  var (
    26  	htmlQuoteReplacer    = strings.NewReplacer(ulquo, ldquo, urquo, rdquo)
    27  	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
    28  )
    29  
    30  // Escape comment text for HTML. If nice is set,
    31  // also turn `` into “ and '' into ”.
    32  func commentEscape(w io.Writer, text string, nice bool) {
    33  	if nice {
    34  		// In the first pass, we convert `` and '' into their unicode equivalents.
    35  		// This prevents them from being escaped in HTMLEscape.
    36  		text = convertQuotes(text)
    37  		var buf bytes.Buffer
    38  		template.HTMLEscape(&buf, []byte(text))
    39  		// Now we convert the unicode quotes to their HTML escaped entities to maintain old behavior.
    40  		// We need to use a temp buffer to read the string back and do the conversion,
    41  		// otherwise HTMLEscape will escape & to &
    42  		htmlQuoteReplacer.WriteString(w, buf.String())
    43  		return
    44  	}
    45  	template.HTMLEscape(w, []byte(text))
    46  }
    47  
    48  func convertQuotes(text string) string {
    49  	return unicodeQuoteReplacer.Replace(text)
    50  }
    51  
    52  const (
    53  	// Regexp for Go identifiers
    54  	identRx = `[\pL_][\pL_0-9]*`
    55  
    56  	// Regexp for URLs
    57  	// Match parens, and check later for balance - see #5043, #22285
    58  	// Match .,:;?! within path, but not at end - see #18139, #16565
    59  	// This excludes some rare yet valid urls ending in common punctuation
    60  	// in order to allow sentences ending in URLs.
    61  
    62  	// protocol (required) e.g. http
    63  	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
    64  	// host (required) e.g. www.example.com or [::1]:8080
    65  	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
    66  	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
    67  	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
    68  
    69  	urlRx = protoPart + `://` + hostPart + pathPart
    70  )
    71  
    72  var matchRx = newLazyRE(`(` + urlRx + `)|(` + identRx + `)`)
    73  
    74  var (
    75  	html_a      = []byte(`<a href="`)
    76  	html_aq     = []byte(`">`)
    77  	html_enda   = []byte("</a>")
    78  	html_i      = []byte("<i>")
    79  	html_endi   = []byte("</i>")
    80  	html_p      = []byte("<p>\n")
    81  	html_endp   = []byte("</p>\n")
    82  	html_pre    = []byte("<pre>")
    83  	html_endpre = []byte("</pre>\n")
    84  	html_h      = []byte(`<h3 id="`)
    85  	html_hq     = []byte(`">`)
    86  	html_endh   = []byte("</h3>\n")
    87  )
    88  
    89  // Emphasize and escape a line of text for HTML. URLs are converted into links;
    90  // if the URL also appears in the words map, the link is taken from the map (if
    91  // the corresponding map value is the empty string, the URL is not converted
    92  // into a link). Go identifiers that appear in the words map are italicized; if
    93  // the corresponding map value is not the empty string, it is considered a URL
    94  // and the word is converted into a link. If nice is set, the remaining text's
    95  // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
    96  // and '' into &rdquo;).
    97  func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
    98  	for {
    99  		m := matchRx.FindStringSubmatchIndex(line)
   100  		if m == nil {
   101  			break
   102  		}
   103  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
   104  
   105  		// write text before match
   106  		commentEscape(w, line[0:m[0]], nice)
   107  
   108  		// adjust match for URLs
   109  		match := line[m[0]:m[1]]
   110  		if strings.Contains(match, "://") {
   111  			m0, m1 := m[0], m[1]
   112  			for _, s := range []string{"()", "{}", "[]"} {
   113  				open, close := s[:1], s[1:] // E.g., "(" and ")"
   114  				// require opening parentheses before closing parentheses (#22285)
   115  				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
   116  					m1 = m0 + i
   117  					match = line[m0:m1]
   118  				}
   119  				// require balanced pairs of parentheses (#5043)
   120  				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
   121  					m1 = strings.LastIndexAny(line[:m1], s)
   122  					match = line[m0:m1]
   123  				}
   124  			}
   125  			if m1 != m[1] {
   126  				// redo matching with shortened line for correct indices
   127  				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
   128  			}
   129  		}
   130  
   131  		// analyze match
   132  		url := ""
   133  		italics := false
   134  		if words != nil {
   135  			url, italics = words[match]
   136  		}
   137  		if m[2] >= 0 {
   138  			// match against first parenthesized sub-regexp; must be match against urlRx
   139  			if !italics {
   140  				// no alternative URL in words list, use match instead
   141  				url = match
   142  			}
   143  			italics = false // don't italicize URLs
   144  		}
   145  
   146  		// write match
   147  		if len(url) > 0 {
   148  			w.Write(html_a)
   149  			template.HTMLEscape(w, []byte(url))
   150  			w.Write(html_aq)
   151  		}
   152  		if italics {
   153  			w.Write(html_i)
   154  		}
   155  		commentEscape(w, match, nice)
   156  		if italics {
   157  			w.Write(html_endi)
   158  		}
   159  		if len(url) > 0 {
   160  			w.Write(html_enda)
   161  		}
   162  
   163  		// advance
   164  		line = line[m[1]:]
   165  	}
   166  	commentEscape(w, line, nice)
   167  }
   168  
   169  func indentLen(s string) int {
   170  	i := 0
   171  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   172  		i++
   173  	}
   174  	return i
   175  }
   176  
   177  func isBlank(s string) bool {
   178  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   179  }
   180  
   181  func commonPrefix(a, b string) string {
   182  	i := 0
   183  	for i < len(a) && i < len(b) && a[i] == b[i] {
   184  		i++
   185  	}
   186  	return a[0:i]
   187  }
   188  
   189  func unindent(block []string) {
   190  	if len(block) == 0 {
   191  		return
   192  	}
   193  
   194  	// compute maximum common white prefix
   195  	prefix := block[0][0:indentLen(block[0])]
   196  	for _, line := range block {
   197  		if !isBlank(line) {
   198  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
   199  		}
   200  	}
   201  	n := len(prefix)
   202  
   203  	// remove
   204  	for i, line := range block {
   205  		if !isBlank(line) {
   206  			block[i] = line[n:]
   207  		}
   208  	}
   209  }
   210  
   211  // heading returns the trimmed line if it passes as a section heading;
   212  // otherwise it returns the empty string.
   213  func heading(line string) string {
   214  	line = strings.TrimSpace(line)
   215  	if len(line) == 0 {
   216  		return ""
   217  	}
   218  
   219  	// a heading must start with an uppercase letter
   220  	r, _ := utf8.DecodeRuneInString(line)
   221  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   222  		return ""
   223  	}
   224  
   225  	// it must end in a letter or digit:
   226  	r, _ = utf8.DecodeLastRuneInString(line)
   227  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   228  		return ""
   229  	}
   230  
   231  	// exclude lines with illegal characters. we allow "(),"
   232  	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
   233  		return ""
   234  	}
   235  
   236  	// allow "'" for possessive "'s" only
   237  	for b := line; ; {
   238  		i := strings.IndexRune(b, '\'')
   239  		if i < 0 {
   240  			break
   241  		}
   242  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   243  			return "" // not followed by "s "
   244  		}
   245  		b = b[i+2:]
   246  	}
   247  
   248  	// allow "." when followed by non-space
   249  	for b := line; ; {
   250  		i := strings.IndexRune(b, '.')
   251  		if i < 0 {
   252  			break
   253  		}
   254  		if i+1 >= len(b) || b[i+1] == ' ' {
   255  			return "" // not followed by non-space
   256  		}
   257  		b = b[i+1:]
   258  	}
   259  
   260  	return line
   261  }
   262  
   263  type op int
   264  
   265  const (
   266  	opPara op = iota
   267  	opHead
   268  	opPre
   269  )
   270  
   271  type block struct {
   272  	op    op
   273  	lines []string
   274  }
   275  
   276  var nonAlphaNumRx = newLazyRE(`[^a-zA-Z0-9]`)
   277  
   278  func anchorID(line string) string {
   279  	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
   280  	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
   281  }
   282  
   283  // ToHTML converts comment text to formatted HTML.
   284  // The comment was prepared by DocReader,
   285  // so it is known not to have leading, trailing blank lines
   286  // nor to have trailing spaces at the end of lines.
   287  // The comment markers have already been removed.
   288  //
   289  // Each span of unindented non-blank lines is converted into
   290  // a single paragraph. There is one exception to the rule: a span that
   291  // consists of a single line, is followed by another paragraph span,
   292  // begins with a capital letter, and contains no punctuation
   293  // other than parentheses and commas is formatted as a heading.
   294  //
   295  // A span of indented lines is converted into a <pre> block,
   296  // with the common indent prefix removed.
   297  //
   298  // URLs in the comment text are converted into links; if the URL also appears
   299  // in the words map, the link is taken from the map (if the corresponding map
   300  // value is the empty string, the URL is not converted into a link).
   301  //
   302  // Go identifiers that appear in the words map are italicized; if the corresponding
   303  // map value is not the empty string, it is considered a URL and the word is converted
   304  // into a link.
   305  func ToHTML(w io.Writer, text string, words map[string]string) {
   306  	for _, b := range blocks(text) {
   307  		switch b.op {
   308  		case opPara:
   309  			w.Write(html_p)
   310  			for _, line := range b.lines {
   311  				emphasize(w, line, words, true)
   312  			}
   313  			w.Write(html_endp)
   314  		case opHead:
   315  			w.Write(html_h)
   316  			id := ""
   317  			for _, line := range b.lines {
   318  				if id == "" {
   319  					id = anchorID(line)
   320  					w.Write([]byte(id))
   321  					w.Write(html_hq)
   322  				}
   323  				commentEscape(w, line, true)
   324  			}
   325  			if id == "" {
   326  				w.Write(html_hq)
   327  			}
   328  			w.Write(html_endh)
   329  		case opPre:
   330  			w.Write(html_pre)
   331  			for _, line := range b.lines {
   332  				emphasize(w, line, nil, false)
   333  			}
   334  			w.Write(html_endpre)
   335  		}
   336  	}
   337  }
   338  
   339  func blocks(text string) []block {
   340  	var (
   341  		out  []block
   342  		para []string
   343  
   344  		lastWasBlank   = false
   345  		lastWasHeading = false
   346  	)
   347  
   348  	close := func() {
   349  		if para != nil {
   350  			out = append(out, block{opPara, para})
   351  			para = nil
   352  		}
   353  	}
   354  
   355  	lines := strings.SplitAfter(text, "\n")
   356  	unindent(lines)
   357  	for i := 0; i < len(lines); {
   358  		line := lines[i]
   359  		if isBlank(line) {
   360  			// close paragraph
   361  			close()
   362  			i++
   363  			lastWasBlank = true
   364  			continue
   365  		}
   366  		if indentLen(line) > 0 {
   367  			// close paragraph
   368  			close()
   369  
   370  			// count indented or blank lines
   371  			j := i + 1
   372  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   373  				j++
   374  			}
   375  			// but not trailing blank lines
   376  			for j > i && isBlank(lines[j-1]) {
   377  				j--
   378  			}
   379  			pre := lines[i:j]
   380  			i = j
   381  
   382  			unindent(pre)
   383  
   384  			// put those lines in a pre block
   385  			out = append(out, block{opPre, pre})
   386  			lastWasHeading = false
   387  			continue
   388  		}
   389  
   390  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   391  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   392  			// current line is non-blank, surrounded by blank lines
   393  			// and the next non-blank line is not indented: this
   394  			// might be a heading.
   395  			if head := heading(line); head != "" {
   396  				close()
   397  				out = append(out, block{opHead, []string{head}})
   398  				i += 2
   399  				lastWasHeading = true
   400  				continue
   401  			}
   402  		}
   403  
   404  		// open paragraph
   405  		lastWasBlank = false
   406  		lastWasHeading = false
   407  		para = append(para, lines[i])
   408  		i++
   409  	}
   410  	close()
   411  
   412  	return out
   413  }
   414  
   415  // ToText prepares comment text for presentation in textual output.
   416  // It wraps paragraphs of text to width or fewer Unicode code points
   417  // and then prefixes each line with the indent. In preformatted sections
   418  // (such as program text), it prefixes each non-blank line with preIndent.
   419  func ToText(w io.Writer, text string, indent, preIndent string, width int) {
   420  	l := lineWrapper{
   421  		out:    w,
   422  		width:  width,
   423  		indent: indent,
   424  	}
   425  	for _, b := range blocks(text) {
   426  		switch b.op {
   427  		case opPara:
   428  			// l.write will add leading newline if required
   429  			for _, line := range b.lines {
   430  				line = convertQuotes(line)
   431  				l.write(line)
   432  			}
   433  			l.flush()
   434  		case opHead:
   435  			w.Write(nl)
   436  			for _, line := range b.lines {
   437  				line = convertQuotes(line)
   438  				l.write(line + "\n")
   439  			}
   440  			l.flush()
   441  		case opPre:
   442  			w.Write(nl)
   443  			for _, line := range b.lines {
   444  				if isBlank(line) {
   445  					w.Write([]byte("\n"))
   446  				} else {
   447  					w.Write([]byte(preIndent))
   448  					line = convertQuotes(line)
   449  					w.Write([]byte(line))
   450  				}
   451  			}
   452  		}
   453  	}
   454  }
   455  
   456  type lineWrapper struct {
   457  	out       io.Writer
   458  	printed   bool
   459  	width     int
   460  	indent    string
   461  	n         int
   462  	pendSpace int
   463  }
   464  
   465  var nl = []byte("\n")
   466  var space = []byte(" ")
   467  
   468  func (l *lineWrapper) write(text string) {
   469  	if l.n == 0 && l.printed {
   470  		l.out.Write(nl) // blank line before new paragraph
   471  	}
   472  	l.printed = true
   473  
   474  	for _, f := range strings.Fields(text) {
   475  		w := utf8.RuneCountInString(f)
   476  		// wrap if line is too long
   477  		if l.n > 0 && l.n+l.pendSpace+w > l.width {
   478  			l.out.Write(nl)
   479  			l.n = 0
   480  			l.pendSpace = 0
   481  		}
   482  		if l.n == 0 {
   483  			l.out.Write([]byte(l.indent))
   484  		}
   485  		l.out.Write(space[:l.pendSpace])
   486  		l.out.Write([]byte(f))
   487  		l.n += l.pendSpace + w
   488  		l.pendSpace = 1
   489  	}
   490  }
   491  
   492  func (l *lineWrapper) flush() {
   493  	if l.n == 0 {
   494  		return
   495  	}
   496  	l.out.Write(nl)
   497  	l.pendSpace = 0
   498  	l.n = 0
   499  }
   500  

View as plain text