...
Run Format

Source file src/go/doc/comment.go

Documentation: go/doc

  // Copyright 2009 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  // Godoc comment extraction and comment -> HTML formatting.
  
  package doc
  
  import (
  	"io"
  	"regexp"
  	"strings"
  	"text/template" // for HTMLEscape
  	"unicode"
  	"unicode/utf8"
  )
  
  var (
  	ldquo = []byte("“")
  	rdquo = []byte("”")
  )
  
  // Escape comment text for HTML. If nice is set,
  // also turn `` into “ and '' into ”.
  func commentEscape(w io.Writer, text string, nice bool) {
  	last := 0
  	if nice {
  		for i := 0; i < len(text)-1; i++ {
  			ch := text[i]
  			if ch == text[i+1] && (ch == '`' || ch == '\'') {
  				template.HTMLEscape(w, []byte(text[last:i]))
  				last = i + 2
  				switch ch {
  				case '`':
  					w.Write(ldquo)
  				case '\'':
  					w.Write(rdquo)
  				}
  				i++ // loop will add one more
  			}
  		}
  	}
  	template.HTMLEscape(w, []byte(text[last:]))
  }
  
  const (
  	// Regexp for Go identifiers
  	identRx = `[\pL_][\pL_0-9]*`
  
  	// Regexp for URLs
  	// Match parens, and check in pairedParensPrefixLen for balance - see #5043
  	// Match .,:;?! within path, but not at end - see #18139, #16565
  	// This excludes some rare yet valid urls ending in common punctuation
  	// in order to allow sentences ending in URLs.
  
  	// protocol (required) e.g. http
  	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
  	// host (required) e.g. www.example.com or [::1]:8080
  	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
  	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
  	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
  
  	urlRx = protoPart + `://` + hostPart + pathPart
  )
  
  var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
  
  var (
  	html_a      = []byte(`<a href="`)
  	html_aq     = []byte(`">`)
  	html_enda   = []byte("</a>")
  	html_i      = []byte("<i>")
  	html_endi   = []byte("</i>")
  	html_p      = []byte("<p>\n")
  	html_endp   = []byte("</p>\n")
  	html_pre    = []byte("<pre>")
  	html_endpre = []byte("</pre>\n")
  	html_h      = []byte(`<h3 id="`)
  	html_hq     = []byte(`">`)
  	html_endh   = []byte("</h3>\n")
  )
  
  // pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
  func pairedParensPrefixLen(s string) int {
  	parens := 0
  	l := len(s)
  	for i, ch := range s {
  		switch ch {
  		case '(':
  			if parens == 0 {
  				l = i
  			}
  			parens++
  		case ')':
  			parens--
  			if parens == 0 {
  				l = len(s)
  			} else if parens < 0 {
  				return i
  			}
  		}
  	}
  	return l
  }
  
  // Emphasize and escape a line of text for HTML. URLs are converted into links;
  // if the URL also appears in the words map, the link is taken from the map (if
  // the corresponding map value is the empty string, the URL is not converted
  // into a link). Go identifiers that appear in the words map are italicized; if
  // the corresponding map value is not the empty string, it is considered a URL
  // and the word is converted into a link. If nice is set, the remaining text's
  // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
  // and '' into &rdquo;).
  func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
  	for {
  		m := matchRx.FindStringSubmatchIndex(line)
  		if m == nil {
  			break
  		}
  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
  
  		// write text before match
  		commentEscape(w, line[0:m[0]], nice)
  
  		// adjust match if necessary
  		match := line[m[0]:m[1]]
  		if n := pairedParensPrefixLen(match); n < len(match) {
  			// match contains unpaired parentheses (rare);
  			// redo matching with shortened line for correct indices
  			m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
  			match = match[:n]
  		}
  
  		// analyze match
  		url := ""
  		italics := false
  		if words != nil {
  			url, italics = words[match]
  		}
  		if m[2] >= 0 {
  			// match against first parenthesized sub-regexp; must be match against urlRx
  			if !italics {
  				// no alternative URL in words list, use match instead
  				url = match
  			}
  			italics = false // don't italicize URLs
  		}
  
  		// write match
  		if len(url) > 0 {
  			w.Write(html_a)
  			template.HTMLEscape(w, []byte(url))
  			w.Write(html_aq)
  		}
  		if italics {
  			w.Write(html_i)
  		}
  		commentEscape(w, match, nice)
  		if italics {
  			w.Write(html_endi)
  		}
  		if len(url) > 0 {
  			w.Write(html_enda)
  		}
  
  		// advance
  		line = line[m[1]:]
  	}
  	commentEscape(w, line, nice)
  }
  
  func indentLen(s string) int {
  	i := 0
  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
  		i++
  	}
  	return i
  }
  
  func isBlank(s string) bool {
  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
  }
  
  func commonPrefix(a, b string) string {
  	i := 0
  	for i < len(a) && i < len(b) && a[i] == b[i] {
  		i++
  	}
  	return a[0:i]
  }
  
  func unindent(block []string) {
  	if len(block) == 0 {
  		return
  	}
  
  	// compute maximum common white prefix
  	prefix := block[0][0:indentLen(block[0])]
  	for _, line := range block {
  		if !isBlank(line) {
  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
  		}
  	}
  	n := len(prefix)
  
  	// remove
  	for i, line := range block {
  		if !isBlank(line) {
  			block[i] = line[n:]
  		}
  	}
  }
  
  // heading returns the trimmed line if it passes as a section heading;
  // otherwise it returns the empty string.
  func heading(line string) string {
  	line = strings.TrimSpace(line)
  	if len(line) == 0 {
  		return ""
  	}
  
  	// a heading must start with an uppercase letter
  	r, _ := utf8.DecodeRuneInString(line)
  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
  		return ""
  	}
  
  	// it must end in a letter or digit:
  	r, _ = utf8.DecodeLastRuneInString(line)
  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
  		return ""
  	}
  
  	// exclude lines with illegal characters
  	if strings.ContainsAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") {
  		return ""
  	}
  
  	// allow "'" for possessive "'s" only
  	for b := line; ; {
  		i := strings.IndexRune(b, '\'')
  		if i < 0 {
  			break
  		}
  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
  			return "" // not followed by "s "
  		}
  		b = b[i+2:]
  	}
  
  	return line
  }
  
  type op int
  
  const (
  	opPara op = iota
  	opHead
  	opPre
  )
  
  type block struct {
  	op    op
  	lines []string
  }
  
  var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
  
  func anchorID(line string) string {
  	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
  	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
  }
  
  // ToHTML converts comment text to formatted HTML.
  // The comment was prepared by DocReader,
  // so it is known not to have leading, trailing blank lines
  // nor to have trailing spaces at the end of lines.
  // The comment markers have already been removed.
  //
  // Each span of unindented non-blank lines is converted into
  // a single paragraph. There is one exception to the rule: a span that
  // consists of a single line, is followed by another paragraph span,
  // begins with a capital letter, and contains no punctuation
  // is formatted as a heading.
  //
  // A span of indented lines is converted into a <pre> block,
  // with the common indent prefix removed.
  //
  // URLs in the comment text are converted into links; if the URL also appears
  // in the words map, the link is taken from the map (if the corresponding map
  // value is the empty string, the URL is not converted into a link).
  //
  // Go identifiers that appear in the words map are italicized; if the corresponding
  // map value is not the empty string, it is considered a URL and the word is converted
  // into a link.
  func ToHTML(w io.Writer, text string, words map[string]string) {
  	for _, b := range blocks(text) {
  		switch b.op {
  		case opPara:
  			w.Write(html_p)
  			for _, line := range b.lines {
  				emphasize(w, line, words, true)
  			}
  			w.Write(html_endp)
  		case opHead:
  			w.Write(html_h)
  			id := ""
  			for _, line := range b.lines {
  				if id == "" {
  					id = anchorID(line)
  					w.Write([]byte(id))
  					w.Write(html_hq)
  				}
  				commentEscape(w, line, true)
  			}
  			if id == "" {
  				w.Write(html_hq)
  			}
  			w.Write(html_endh)
  		case opPre:
  			w.Write(html_pre)
  			for _, line := range b.lines {
  				emphasize(w, line, nil, false)
  			}
  			w.Write(html_endpre)
  		}
  	}
  }
  
  func blocks(text string) []block {
  	var (
  		out  []block
  		para []string
  
  		lastWasBlank   = false
  		lastWasHeading = false
  	)
  
  	close := func() {
  		if para != nil {
  			out = append(out, block{opPara, para})
  			para = nil
  		}
  	}
  
  	lines := strings.SplitAfter(text, "\n")
  	unindent(lines)
  	for i := 0; i < len(lines); {
  		line := lines[i]
  		if isBlank(line) {
  			// close paragraph
  			close()
  			i++
  			lastWasBlank = true
  			continue
  		}
  		if indentLen(line) > 0 {
  			// close paragraph
  			close()
  
  			// count indented or blank lines
  			j := i + 1
  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
  				j++
  			}
  			// but not trailing blank lines
  			for j > i && isBlank(lines[j-1]) {
  				j--
  			}
  			pre := lines[i:j]
  			i = j
  
  			unindent(pre)
  
  			// put those lines in a pre block
  			out = append(out, block{opPre, pre})
  			lastWasHeading = false
  			continue
  		}
  
  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
  			// current line is non-blank, surrounded by blank lines
  			// and the next non-blank line is not indented: this
  			// might be a heading.
  			if head := heading(line); head != "" {
  				close()
  				out = append(out, block{opHead, []string{head}})
  				i += 2
  				lastWasHeading = true
  				continue
  			}
  		}
  
  		// open paragraph
  		lastWasBlank = false
  		lastWasHeading = false
  		para = append(para, lines[i])
  		i++
  	}
  	close()
  
  	return out
  }
  
  // ToText prepares comment text for presentation in textual output.
  // It wraps paragraphs of text to width or fewer Unicode code points
  // and then prefixes each line with the indent. In preformatted sections
  // (such as program text), it prefixes each non-blank line with preIndent.
  func ToText(w io.Writer, text string, indent, preIndent string, width int) {
  	l := lineWrapper{
  		out:    w,
  		width:  width,
  		indent: indent,
  	}
  	for _, b := range blocks(text) {
  		switch b.op {
  		case opPara:
  			// l.write will add leading newline if required
  			for _, line := range b.lines {
  				l.write(line)
  			}
  			l.flush()
  		case opHead:
  			w.Write(nl)
  			for _, line := range b.lines {
  				l.write(line + "\n")
  			}
  			l.flush()
  		case opPre:
  			w.Write(nl)
  			for _, line := range b.lines {
  				if isBlank(line) {
  					w.Write([]byte("\n"))
  				} else {
  					w.Write([]byte(preIndent))
  					w.Write([]byte(line))
  				}
  			}
  		}
  	}
  }
  
  type lineWrapper struct {
  	out       io.Writer
  	printed   bool
  	width     int
  	indent    string
  	n         int
  	pendSpace int
  }
  
  var nl = []byte("\n")
  var space = []byte(" ")
  
  func (l *lineWrapper) write(text string) {
  	if l.n == 0 && l.printed {
  		l.out.Write(nl) // blank line before new paragraph
  	}
  	l.printed = true
  
  	for _, f := range strings.Fields(text) {
  		w := utf8.RuneCountInString(f)
  		// wrap if line is too long
  		if l.n > 0 && l.n+l.pendSpace+w > l.width {
  			l.out.Write(nl)
  			l.n = 0
  			l.pendSpace = 0
  		}
  		if l.n == 0 {
  			l.out.Write([]byte(l.indent))
  		}
  		l.out.Write(space[:l.pendSpace])
  		l.out.Write([]byte(f))
  		l.n += l.pendSpace + w
  		l.pendSpace = 1
  	}
  }
  
  func (l *lineWrapper) flush() {
  	if l.n == 0 {
  		return
  	}
  	l.out.Write(nl)
  	l.pendSpace = 0
  	l.n = 0
  }
  

View as plain text