The Go Programming Language

Source file src/pkg/go/doc/comment.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Godoc comment extraction and comment -> HTML formatting.
     6	
     7	package doc
     8	
     9	import (
    10		"go/ast"
    11		"io"
    12		"regexp"
    13		"strings"
    14		"template" // for HTMLEscape
    15	)
    16	
    17	func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
    18	
    19	func stripTrailingWhitespace(s string) string {
    20		i := len(s)
    21		for i > 0 && isWhitespace(s[i-1]) {
    22			i--
    23		}
    24		return s[0:i]
    25	}
    26	
    27	// CommentText returns the text of comment,
    28	// with the comment markers - //, /*, and */ - removed.
    29	func CommentText(comment *ast.CommentGroup) string {
    30		if comment == nil {
    31			return ""
    32		}
    33		comments := make([]string, len(comment.List))
    34		for i, c := range comment.List {
    35			comments[i] = string(c.Text)
    36		}
    37	
    38		lines := make([]string, 0, 10) // most comments are less than 10 lines
    39		for _, c := range comments {
    40			// Remove comment markers.
    41			// The parser has given us exactly the comment text.
    42			switch c[1] {
    43			case '/':
    44				//-style comment
    45				c = c[2:]
    46				// Remove leading space after //, if there is one.
    47				// TODO(gri) This appears to be necessary in isolated
    48				//           cases (bignum.RatFromString) - why?
    49				if len(c) > 0 && c[0] == ' ' {
    50					c = c[1:]
    51				}
    52			case '*':
    53				/*-style comment */
    54				c = c[2 : len(c)-2]
    55			}
    56	
    57			// Split on newlines.
    58			cl := strings.Split(c, "\n")
    59	
    60			// Walk lines, stripping trailing white space and adding to list.
    61			for _, l := range cl {
    62				lines = append(lines, stripTrailingWhitespace(l))
    63			}
    64		}
    65	
    66		// Remove leading blank lines; convert runs of
    67		// interior blank lines to a single blank line.
    68		n := 0
    69		for _, line := range lines {
    70			if line != "" || n > 0 && lines[n-1] != "" {
    71				lines[n] = line
    72				n++
    73			}
    74		}
    75		lines = lines[0:n]
    76	
    77		// Add final "" entry to get trailing newline from Join.
    78		if n > 0 && lines[n-1] != "" {
    79			lines = append(lines, "")
    80		}
    81	
    82		return strings.Join(lines, "\n")
    83	}
    84	
    85	// Split bytes into lines.
    86	func split(text []byte) [][]byte {
    87		// count lines
    88		n := 0
    89		last := 0
    90		for i, c := range text {
    91			if c == '\n' {
    92				last = i + 1
    93				n++
    94			}
    95		}
    96		if last < len(text) {
    97			n++
    98		}
    99	
   100		// split
   101		out := make([][]byte, n)
   102		last = 0
   103		n = 0
   104		for i, c := range text {
   105			if c == '\n' {
   106				out[n] = text[last : i+1]
   107				last = i + 1
   108				n++
   109			}
   110		}
   111		if last < len(text) {
   112			out[n] = text[last:]
   113		}
   114	
   115		return out
   116	}
   117	
   118	var (
   119		ldquo = []byte("&ldquo;")
   120		rdquo = []byte("&rdquo;")
   121	)
   122	
   123	// Escape comment text for HTML. If nice is set,
   124	// also turn `` into &ldquo; and '' into &rdquo;.
   125	func commentEscape(w io.Writer, s []byte, nice bool) {
   126		last := 0
   127		if nice {
   128			for i := 0; i < len(s)-1; i++ {
   129				ch := s[i]
   130				if ch == s[i+1] && (ch == '`' || ch == '\'') {
   131					template.HTMLEscape(w, s[last:i])
   132					last = i + 2
   133					switch ch {
   134					case '`':
   135						w.Write(ldquo)
   136					case '\'':
   137						w.Write(rdquo)
   138					}
   139					i++ // loop will add one more
   140				}
   141			}
   142		}
   143		template.HTMLEscape(w, s[last:])
   144	}
   145	
   146	const (
   147		// Regexp for Go identifiers
   148		identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
   149	
   150		// Regexp for URLs
   151		protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
   152		hostPart = `[a-zA-Z0-9_@\-]+`
   153		filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
   154		urlRx    = protocol + `//` + // http://
   155			hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
   156			filePart + `([:.,]` + filePart + `)*`
   157	)
   158	
   159	var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
   160	
   161	var (
   162		html_a      = []byte(`<a href="`)
   163		html_aq     = []byte(`">`)
   164		html_enda   = []byte("</a>")
   165		html_i      = []byte("<i>")
   166		html_endi   = []byte("</i>")
   167		html_p      = []byte("<p>\n")
   168		html_endp   = []byte("</p>\n")
   169		html_pre    = []byte("<pre>")
   170		html_endpre = []byte("</pre>\n")
   171	)
   172	
   173	// Emphasize and escape a line of text for HTML. URLs are converted into links;
   174	// if the URL also appears in the words map, the link is taken from the map (if
   175	// the corresponding map value is the empty string, the URL is not converted
   176	// into a link). Go identifiers that appear in the words map are italicized; if
   177	// the corresponding map value is not the empty string, it is considered a URL
   178	// and the word is converted into a link. If nice is set, the remaining text's
   179	// appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
   180	// and '' into &rdquo;).
   181	func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
   182		for {
   183			m := matchRx.FindSubmatchIndex(line)
   184			if m == nil {
   185				break
   186			}
   187			// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
   188	
   189			// write text before match
   190			commentEscape(w, line[0:m[0]], nice)
   191	
   192			// analyze match
   193			match := line[m[0]:m[1]]
   194			url := ""
   195			italics := false
   196			if words != nil {
   197				url, italics = words[string(match)]
   198			}
   199			if m[2] < 0 {
   200				// didn't match against first parenthesized sub-regexp; must be match against urlRx
   201				if !italics {
   202					// no alternative URL in words list, use match instead
   203					url = string(match)
   204				}
   205				italics = false // don't italicize URLs
   206			}
   207	
   208			// write match
   209			if len(url) > 0 {
   210				w.Write(html_a)
   211				template.HTMLEscape(w, []byte(url))
   212				w.Write(html_aq)
   213			}
   214			if italics {
   215				w.Write(html_i)
   216			}
   217			commentEscape(w, match, nice)
   218			if italics {
   219				w.Write(html_endi)
   220			}
   221			if len(url) > 0 {
   222				w.Write(html_enda)
   223			}
   224	
   225			// advance
   226			line = line[m[1]:]
   227		}
   228		commentEscape(w, line, nice)
   229	}
   230	
   231	func indentLen(s []byte) int {
   232		i := 0
   233		for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   234			i++
   235		}
   236		return i
   237	}
   238	
   239	func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') }
   240	
   241	func commonPrefix(a, b []byte) []byte {
   242		i := 0
   243		for i < len(a) && i < len(b) && a[i] == b[i] {
   244			i++
   245		}
   246		return a[0:i]
   247	}
   248	
   249	func unindent(block [][]byte) {
   250		if len(block) == 0 {
   251			return
   252		}
   253	
   254		// compute maximum common white prefix
   255		prefix := block[0][0:indentLen(block[0])]
   256		for _, line := range block {
   257			if !isBlank(line) {
   258				prefix = commonPrefix(prefix, line[0:indentLen(line)])
   259			}
   260		}
   261		n := len(prefix)
   262	
   263		// remove
   264		for i, line := range block {
   265			if !isBlank(line) {
   266				block[i] = line[n:]
   267			}
   268		}
   269	}
   270	
   271	// Convert comment text to formatted HTML.
   272	// The comment was prepared by DocReader,
   273	// so it is known not to have leading, trailing blank lines
   274	// nor to have trailing spaces at the end of lines.
   275	// The comment markers have already been removed.
   276	//
   277	// Turn each run of multiple \n into </p><p>.
   278	// Turn each run of indented lines into a <pre> block without indent.
   279	//
   280	// URLs in the comment text are converted into links; if the URL also appears
   281	// in the words map, the link is taken from the map (if the corresponding map
   282	// value is the empty string, the URL is not converted into a link).
   283	//
   284	// Go identifiers that appear in the words map are italicized; if the corresponding
   285	// map value is not the empty string, it is considered a URL and the word is converted
   286	// into a link.
   287	func ToHTML(w io.Writer, s []byte, words map[string]string) {
   288		inpara := false
   289	
   290		close := func() {
   291			if inpara {
   292				w.Write(html_endp)
   293				inpara = false
   294			}
   295		}
   296		open := func() {
   297			if !inpara {
   298				w.Write(html_p)
   299				inpara = true
   300			}
   301		}
   302	
   303		lines := split(s)
   304		unindent(lines)
   305		for i := 0; i < len(lines); {
   306			line := lines[i]
   307			if isBlank(line) {
   308				// close paragraph
   309				close()
   310				i++
   311				continue
   312			}
   313			if indentLen(line) > 0 {
   314				// close paragraph
   315				close()
   316	
   317				// count indented or blank lines
   318				j := i + 1
   319				for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   320					j++
   321				}
   322				// but not trailing blank lines
   323				for j > i && isBlank(lines[j-1]) {
   324					j--
   325				}
   326				block := lines[i:j]
   327				i = j
   328	
   329				unindent(block)
   330	
   331				// put those lines in a pre block
   332				w.Write(html_pre)
   333				for _, line := range block {
   334					emphasize(w, line, nil, false) // no nice text formatting
   335				}
   336				w.Write(html_endpre)
   337				continue
   338			}
   339			// open paragraph
   340			open()
   341			emphasize(w, lines[i], words, true) // nice text formatting
   342			i++
   343		}
   344		close()
   345	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.