Source file src/cmd/go/internal/get/vcs.go

Documentation: cmd/go/internal/get

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package get
     6  
     7  import (
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"internal/lazyregexp"
    12  	"internal/singleflight"
    13  	"log"
    14  	urlpkg "net/url"
    15  	"os"
    16  	"os/exec"
    17  	"path/filepath"
    18  	"regexp"
    19  	"strings"
    20  	"sync"
    21  
    22  	"cmd/go/internal/base"
    23  	"cmd/go/internal/cfg"
    24  	"cmd/go/internal/load"
    25  	"cmd/go/internal/web"
    26  )
    27  
    28  // A vcsCmd describes how to use a version control system
    29  // like Mercurial, Git, or Subversion.
    30  type vcsCmd struct {
    31  	name string
    32  	cmd  string // name of binary to invoke command
    33  
    34  	createCmd   []string // commands to download a fresh copy of a repository
    35  	downloadCmd []string // commands to download updates into an existing repository
    36  
    37  	tagCmd         []tagCmd // commands to list tags
    38  	tagLookupCmd   []tagCmd // commands to lookup tags before running tagSyncCmd
    39  	tagSyncCmd     []string // commands to sync to specific tag
    40  	tagSyncDefault []string // commands to sync to default tag
    41  
    42  	scheme  []string
    43  	pingCmd string
    44  
    45  	remoteRepo  func(v *vcsCmd, rootDir string) (remoteRepo string, err error)
    46  	resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error)
    47  }
    48  
    49  var defaultSecureScheme = map[string]bool{
    50  	"https":   true,
    51  	"git+ssh": true,
    52  	"bzr+ssh": true,
    53  	"svn+ssh": true,
    54  	"ssh":     true,
    55  }
    56  
    57  func (v *vcsCmd) isSecure(repo string) bool {
    58  	u, err := urlpkg.Parse(repo)
    59  	if err != nil {
    60  		// If repo is not a URL, it's not secure.
    61  		return false
    62  	}
    63  	return v.isSecureScheme(u.Scheme)
    64  }
    65  
    66  func (v *vcsCmd) isSecureScheme(scheme string) bool {
    67  	switch v.cmd {
    68  	case "git":
    69  		// GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a
    70  		// colon-separated list of schemes that are allowed to be used with git
    71  		// fetch/clone. Any scheme not mentioned will be considered insecure.
    72  		if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" {
    73  			for _, s := range strings.Split(allow, ":") {
    74  				if s == scheme {
    75  					return true
    76  				}
    77  			}
    78  			return false
    79  		}
    80  	}
    81  	return defaultSecureScheme[scheme]
    82  }
    83  
    84  // A tagCmd describes a command to list available tags
    85  // that can be passed to tagSyncCmd.
    86  type tagCmd struct {
    87  	cmd     string // command to list tags
    88  	pattern string // regexp to extract tags from list
    89  }
    90  
    91  // vcsList lists the known version control systems
    92  var vcsList = []*vcsCmd{
    93  	vcsHg,
    94  	vcsGit,
    95  	vcsSvn,
    96  	vcsBzr,
    97  	vcsFossil,
    98  }
    99  
   100  // vcsByCmd returns the version control system for the given
   101  // command name (hg, git, svn, bzr).
   102  func vcsByCmd(cmd string) *vcsCmd {
   103  	for _, vcs := range vcsList {
   104  		if vcs.cmd == cmd {
   105  			return vcs
   106  		}
   107  	}
   108  	return nil
   109  }
   110  
   111  // vcsHg describes how to use Mercurial.
   112  var vcsHg = &vcsCmd{
   113  	name: "Mercurial",
   114  	cmd:  "hg",
   115  
   116  	createCmd:   []string{"clone -U -- {repo} {dir}"},
   117  	downloadCmd: []string{"pull"},
   118  
   119  	// We allow both tag and branch names as 'tags'
   120  	// for selecting a version. This lets people have
   121  	// a go.release.r60 branch and a go1 branch
   122  	// and make changes in both, without constantly
   123  	// editing .hgtags.
   124  	tagCmd: []tagCmd{
   125  		{"tags", `^(\S+)`},
   126  		{"branches", `^(\S+)`},
   127  	},
   128  	tagSyncCmd:     []string{"update -r {tag}"},
   129  	tagSyncDefault: []string{"update default"},
   130  
   131  	scheme:     []string{"https", "http", "ssh"},
   132  	pingCmd:    "identify -- {scheme}://{repo}",
   133  	remoteRepo: hgRemoteRepo,
   134  }
   135  
   136  func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) {
   137  	out, err := vcsHg.runOutput(rootDir, "paths default")
   138  	if err != nil {
   139  		return "", err
   140  	}
   141  	return strings.TrimSpace(string(out)), nil
   142  }
   143  
   144  // vcsGit describes how to use Git.
   145  var vcsGit = &vcsCmd{
   146  	name: "Git",
   147  	cmd:  "git",
   148  
   149  	createCmd:   []string{"clone -- {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"},
   150  	downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"},
   151  
   152  	tagCmd: []tagCmd{
   153  		// tags/xxx matches a git tag named xxx
   154  		// origin/xxx matches a git branch named xxx on the default remote repository
   155  		{"show-ref", `(?:tags|origin)/(\S+)$`},
   156  	},
   157  	tagLookupCmd: []tagCmd{
   158  		{"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`},
   159  	},
   160  	tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"},
   161  	// both createCmd and downloadCmd update the working dir.
   162  	// No need to do more here. We used to 'checkout master'
   163  	// but that doesn't work if the default branch is not named master.
   164  	// DO NOT add 'checkout master' here.
   165  	// See golang.org/issue/9032.
   166  	tagSyncDefault: []string{"submodule update --init --recursive"},
   167  
   168  	scheme: []string{"git", "https", "http", "git+ssh", "ssh"},
   169  
   170  	// Leave out the '--' separator in the ls-remote command: git 2.7.4 does not
   171  	// support such a separator for that command, and this use should be safe
   172  	// without it because the {scheme} value comes from the predefined list above.
   173  	// See golang.org/issue/33836.
   174  	pingCmd: "ls-remote {scheme}://{repo}",
   175  
   176  	remoteRepo: gitRemoteRepo,
   177  }
   178  
   179  // scpSyntaxRe matches the SCP-like addresses used by Git to access
   180  // repositories by SSH.
   181  var scpSyntaxRe = lazyregexp.New(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
   182  
   183  func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) {
   184  	cmd := "config remote.origin.url"
   185  	errParse := errors.New("unable to parse output of git " + cmd)
   186  	errRemoteOriginNotFound := errors.New("remote origin not found")
   187  	outb, err := vcsGit.run1(rootDir, cmd, nil, false)
   188  	if err != nil {
   189  		// if it doesn't output any message, it means the config argument is correct,
   190  		// but the config value itself doesn't exist
   191  		if outb != nil && len(outb) == 0 {
   192  			return "", errRemoteOriginNotFound
   193  		}
   194  		return "", err
   195  	}
   196  	out := strings.TrimSpace(string(outb))
   197  
   198  	var repoURL *urlpkg.URL
   199  	if m := scpSyntaxRe.FindStringSubmatch(out); m != nil {
   200  		// Match SCP-like syntax and convert it to a URL.
   201  		// Eg, "git@github.com:user/repo" becomes
   202  		// "ssh://git@github.com/user/repo".
   203  		repoURL = &urlpkg.URL{
   204  			Scheme: "ssh",
   205  			User:   urlpkg.User(m[1]),
   206  			Host:   m[2],
   207  			Path:   m[3],
   208  		}
   209  	} else {
   210  		repoURL, err = urlpkg.Parse(out)
   211  		if err != nil {
   212  			return "", err
   213  		}
   214  	}
   215  
   216  	// Iterate over insecure schemes too, because this function simply
   217  	// reports the state of the repo. If we can't see insecure schemes then
   218  	// we can't report the actual repo URL.
   219  	for _, s := range vcsGit.scheme {
   220  		if repoURL.Scheme == s {
   221  			return repoURL.String(), nil
   222  		}
   223  	}
   224  	return "", errParse
   225  }
   226  
   227  // vcsBzr describes how to use Bazaar.
   228  var vcsBzr = &vcsCmd{
   229  	name: "Bazaar",
   230  	cmd:  "bzr",
   231  
   232  	createCmd: []string{"branch -- {repo} {dir}"},
   233  
   234  	// Without --overwrite bzr will not pull tags that changed.
   235  	// Replace by --overwrite-tags after http://pad.lv/681792 goes in.
   236  	downloadCmd: []string{"pull --overwrite"},
   237  
   238  	tagCmd:         []tagCmd{{"tags", `^(\S+)`}},
   239  	tagSyncCmd:     []string{"update -r {tag}"},
   240  	tagSyncDefault: []string{"update -r revno:-1"},
   241  
   242  	scheme:      []string{"https", "http", "bzr", "bzr+ssh"},
   243  	pingCmd:     "info -- {scheme}://{repo}",
   244  	remoteRepo:  bzrRemoteRepo,
   245  	resolveRepo: bzrResolveRepo,
   246  }
   247  
   248  func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) {
   249  	outb, err := vcsBzr.runOutput(rootDir, "config parent_location")
   250  	if err != nil {
   251  		return "", err
   252  	}
   253  	return strings.TrimSpace(string(outb)), nil
   254  }
   255  
   256  func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) {
   257  	outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo)
   258  	if err != nil {
   259  		return "", err
   260  	}
   261  	out := string(outb)
   262  
   263  	// Expect:
   264  	// ...
   265  	//   (branch root|repository branch): <URL>
   266  	// ...
   267  
   268  	found := false
   269  	for _, prefix := range []string{"\n  branch root: ", "\n  repository branch: "} {
   270  		i := strings.Index(out, prefix)
   271  		if i >= 0 {
   272  			out = out[i+len(prefix):]
   273  			found = true
   274  			break
   275  		}
   276  	}
   277  	if !found {
   278  		return "", fmt.Errorf("unable to parse output of bzr info")
   279  	}
   280  
   281  	i := strings.Index(out, "\n")
   282  	if i < 0 {
   283  		return "", fmt.Errorf("unable to parse output of bzr info")
   284  	}
   285  	out = out[:i]
   286  	return strings.TrimSpace(out), nil
   287  }
   288  
   289  // vcsSvn describes how to use Subversion.
   290  var vcsSvn = &vcsCmd{
   291  	name: "Subversion",
   292  	cmd:  "svn",
   293  
   294  	createCmd:   []string{"checkout -- {repo} {dir}"},
   295  	downloadCmd: []string{"update"},
   296  
   297  	// There is no tag command in subversion.
   298  	// The branch information is all in the path names.
   299  
   300  	scheme:     []string{"https", "http", "svn", "svn+ssh"},
   301  	pingCmd:    "info -- {scheme}://{repo}",
   302  	remoteRepo: svnRemoteRepo,
   303  }
   304  
   305  func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) {
   306  	outb, err := vcsSvn.runOutput(rootDir, "info")
   307  	if err != nil {
   308  		return "", err
   309  	}
   310  	out := string(outb)
   311  
   312  	// Expect:
   313  	//
   314  	//	 ...
   315  	// 	URL: <URL>
   316  	// 	...
   317  	//
   318  	// Note that we're not using the Repository Root line,
   319  	// because svn allows checking out subtrees.
   320  	// The URL will be the URL of the subtree (what we used with 'svn co')
   321  	// while the Repository Root may be a much higher parent.
   322  	i := strings.Index(out, "\nURL: ")
   323  	if i < 0 {
   324  		return "", fmt.Errorf("unable to parse output of svn info")
   325  	}
   326  	out = out[i+len("\nURL: "):]
   327  	i = strings.Index(out, "\n")
   328  	if i < 0 {
   329  		return "", fmt.Errorf("unable to parse output of svn info")
   330  	}
   331  	out = out[:i]
   332  	return strings.TrimSpace(out), nil
   333  }
   334  
   335  // fossilRepoName is the name go get associates with a fossil repository. In the
   336  // real world the file can be named anything.
   337  const fossilRepoName = ".fossil"
   338  
   339  // vcsFossil describes how to use Fossil (fossil-scm.org)
   340  var vcsFossil = &vcsCmd{
   341  	name: "Fossil",
   342  	cmd:  "fossil",
   343  
   344  	createCmd:   []string{"-go-internal-mkdir {dir} clone -- {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"},
   345  	downloadCmd: []string{"up"},
   346  
   347  	tagCmd:         []tagCmd{{"tag ls", `(.*)`}},
   348  	tagSyncCmd:     []string{"up tag:{tag}"},
   349  	tagSyncDefault: []string{"up trunk"},
   350  
   351  	scheme:     []string{"https", "http"},
   352  	remoteRepo: fossilRemoteRepo,
   353  }
   354  
   355  func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) {
   356  	out, err := vcsFossil.runOutput(rootDir, "remote-url")
   357  	if err != nil {
   358  		return "", err
   359  	}
   360  	return strings.TrimSpace(string(out)), nil
   361  }
   362  
   363  func (v *vcsCmd) String() string {
   364  	return v.name
   365  }
   366  
   367  // run runs the command line cmd in the given directory.
   368  // keyval is a list of key, value pairs. run expands
   369  // instances of {key} in cmd into value, but only after
   370  // splitting cmd into individual arguments.
   371  // If an error occurs, run prints the command line and the
   372  // command's combined stdout+stderr to standard error.
   373  // Otherwise run discards the command's output.
   374  func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error {
   375  	_, err := v.run1(dir, cmd, keyval, true)
   376  	return err
   377  }
   378  
   379  // runVerboseOnly is like run but only generates error output to standard error in verbose mode.
   380  func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error {
   381  	_, err := v.run1(dir, cmd, keyval, false)
   382  	return err
   383  }
   384  
   385  // runOutput is like run but returns the output of the command.
   386  func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) {
   387  	return v.run1(dir, cmd, keyval, true)
   388  }
   389  
   390  // run1 is the generalized implementation of run and runOutput.
   391  func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) {
   392  	m := make(map[string]string)
   393  	for i := 0; i < len(keyval); i += 2 {
   394  		m[keyval[i]] = keyval[i+1]
   395  	}
   396  	args := strings.Fields(cmdline)
   397  	for i, arg := range args {
   398  		args[i] = expand(m, arg)
   399  	}
   400  
   401  	if len(args) >= 2 && args[0] == "-go-internal-mkdir" {
   402  		var err error
   403  		if filepath.IsAbs(args[1]) {
   404  			err = os.Mkdir(args[1], os.ModePerm)
   405  		} else {
   406  			err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm)
   407  		}
   408  		if err != nil {
   409  			return nil, err
   410  		}
   411  		args = args[2:]
   412  	}
   413  
   414  	if len(args) >= 2 && args[0] == "-go-internal-cd" {
   415  		if filepath.IsAbs(args[1]) {
   416  			dir = args[1]
   417  		} else {
   418  			dir = filepath.Join(dir, args[1])
   419  		}
   420  		args = args[2:]
   421  	}
   422  
   423  	_, err := exec.LookPath(v.cmd)
   424  	if err != nil {
   425  		fmt.Fprintf(os.Stderr,
   426  			"go: missing %s command. See https://golang.org/s/gogetcmd\n",
   427  			v.name)
   428  		return nil, err
   429  	}
   430  
   431  	cmd := exec.Command(v.cmd, args...)
   432  	cmd.Dir = dir
   433  	cmd.Env = base.EnvForDir(cmd.Dir, os.Environ())
   434  	if cfg.BuildX {
   435  		fmt.Fprintf(os.Stderr, "cd %s\n", dir)
   436  		fmt.Fprintf(os.Stderr, "%s %s\n", v.cmd, strings.Join(args, " "))
   437  	}
   438  	out, err := cmd.Output()
   439  	if err != nil {
   440  		if verbose || cfg.BuildV {
   441  			fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " "))
   442  			if ee, ok := err.(*exec.ExitError); ok && len(ee.Stderr) > 0 {
   443  				os.Stderr.Write(ee.Stderr)
   444  			} else {
   445  				fmt.Fprintf(os.Stderr, err.Error())
   446  			}
   447  		}
   448  	}
   449  	return out, err
   450  }
   451  
   452  // ping pings to determine scheme to use.
   453  func (v *vcsCmd) ping(scheme, repo string) error {
   454  	return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo)
   455  }
   456  
   457  // create creates a new copy of repo in dir.
   458  // The parent of dir must exist; dir must not.
   459  func (v *vcsCmd) create(dir, repo string) error {
   460  	for _, cmd := range v.createCmd {
   461  		if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil {
   462  			return err
   463  		}
   464  	}
   465  	return nil
   466  }
   467  
   468  // download downloads any new changes for the repo in dir.
   469  func (v *vcsCmd) download(dir string) error {
   470  	for _, cmd := range v.downloadCmd {
   471  		if err := v.run(dir, cmd); err != nil {
   472  			return err
   473  		}
   474  	}
   475  	return nil
   476  }
   477  
   478  // tags returns the list of available tags for the repo in dir.
   479  func (v *vcsCmd) tags(dir string) ([]string, error) {
   480  	var tags []string
   481  	for _, tc := range v.tagCmd {
   482  		out, err := v.runOutput(dir, tc.cmd)
   483  		if err != nil {
   484  			return nil, err
   485  		}
   486  		re := regexp.MustCompile(`(?m-s)` + tc.pattern)
   487  		for _, m := range re.FindAllStringSubmatch(string(out), -1) {
   488  			tags = append(tags, m[1])
   489  		}
   490  	}
   491  	return tags, nil
   492  }
   493  
   494  // tagSync syncs the repo in dir to the named tag,
   495  // which either is a tag returned by tags or is v.tagDefault.
   496  func (v *vcsCmd) tagSync(dir, tag string) error {
   497  	if v.tagSyncCmd == nil {
   498  		return nil
   499  	}
   500  	if tag != "" {
   501  		for _, tc := range v.tagLookupCmd {
   502  			out, err := v.runOutput(dir, tc.cmd, "tag", tag)
   503  			if err != nil {
   504  				return err
   505  			}
   506  			re := regexp.MustCompile(`(?m-s)` + tc.pattern)
   507  			m := re.FindStringSubmatch(string(out))
   508  			if len(m) > 1 {
   509  				tag = m[1]
   510  				break
   511  			}
   512  		}
   513  	}
   514  
   515  	if tag == "" && v.tagSyncDefault != nil {
   516  		for _, cmd := range v.tagSyncDefault {
   517  			if err := v.run(dir, cmd); err != nil {
   518  				return err
   519  			}
   520  		}
   521  		return nil
   522  	}
   523  
   524  	for _, cmd := range v.tagSyncCmd {
   525  		if err := v.run(dir, cmd, "tag", tag); err != nil {
   526  			return err
   527  		}
   528  	}
   529  	return nil
   530  }
   531  
   532  // A vcsPath describes how to convert an import path into a
   533  // version control system and repository name.
   534  type vcsPath struct {
   535  	prefix         string                              // prefix this description applies to
   536  	regexp         *lazyregexp.Regexp                  // compiled pattern for import path
   537  	repo           string                              // repository to use (expand with match of re)
   538  	vcs            string                              // version control system to use (expand with match of re)
   539  	check          func(match map[string]string) error // additional checks
   540  	schemelessRepo bool                                // if true, the repo pattern lacks a scheme
   541  }
   542  
   543  // vcsFromDir inspects dir and its parents to determine the
   544  // version control system and code repository to use.
   545  // On return, root is the import path
   546  // corresponding to the root of the repository.
   547  func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) {
   548  	// Clean and double-check that dir is in (a subdirectory of) srcRoot.
   549  	dir = filepath.Clean(dir)
   550  	srcRoot = filepath.Clean(srcRoot)
   551  	if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
   552  		return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
   553  	}
   554  
   555  	var vcsRet *vcsCmd
   556  	var rootRet string
   557  
   558  	origDir := dir
   559  	for len(dir) > len(srcRoot) {
   560  		for _, vcs := range vcsList {
   561  			if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil {
   562  				root := filepath.ToSlash(dir[len(srcRoot)+1:])
   563  				// Record first VCS we find, but keep looking,
   564  				// to detect mistakes like one kind of VCS inside another.
   565  				if vcsRet == nil {
   566  					vcsRet = vcs
   567  					rootRet = root
   568  					continue
   569  				}
   570  				// Allow .git inside .git, which can arise due to submodules.
   571  				if vcsRet == vcs && vcs.cmd == "git" {
   572  					continue
   573  				}
   574  				// Otherwise, we have one VCS inside a different VCS.
   575  				return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s",
   576  					filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd)
   577  			}
   578  		}
   579  
   580  		// Move to parent.
   581  		ndir := filepath.Dir(dir)
   582  		if len(ndir) >= len(dir) {
   583  			// Shouldn't happen, but just in case, stop.
   584  			break
   585  		}
   586  		dir = ndir
   587  	}
   588  
   589  	if vcsRet != nil {
   590  		return vcsRet, rootRet, nil
   591  	}
   592  
   593  	return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir)
   594  }
   595  
   596  // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS
   597  // situation for dir, checking parents up until srcRoot.
   598  func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error {
   599  	if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
   600  		return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
   601  	}
   602  
   603  	otherDir := dir
   604  	for len(otherDir) > len(srcRoot) {
   605  		for _, otherVCS := range vcsList {
   606  			if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil {
   607  				// Allow expected vcs in original dir.
   608  				if otherDir == dir && otherVCS == vcs {
   609  					continue
   610  				}
   611  				// Allow .git inside .git, which can arise due to submodules.
   612  				if otherVCS == vcs && vcs.cmd == "git" {
   613  					continue
   614  				}
   615  				// Otherwise, we have one VCS inside a different VCS.
   616  				return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd)
   617  			}
   618  		}
   619  		// Move to parent.
   620  		newDir := filepath.Dir(otherDir)
   621  		if len(newDir) >= len(otherDir) {
   622  			// Shouldn't happen, but just in case, stop.
   623  			break
   624  		}
   625  		otherDir = newDir
   626  	}
   627  
   628  	return nil
   629  }
   630  
   631  // RepoRoot describes the repository root for a tree of source code.
   632  type RepoRoot struct {
   633  	Repo     string // repository URL, including scheme
   634  	Root     string // import path corresponding to root of repo
   635  	IsCustom bool   // defined by served <meta> tags (as opposed to hard-coded pattern)
   636  	VCS      string // vcs type ("mod", "git", ...)
   637  
   638  	vcs *vcsCmd // internal: vcs command access
   639  }
   640  
   641  func httpPrefix(s string) string {
   642  	for _, prefix := range [...]string{"http:", "https:"} {
   643  		if strings.HasPrefix(s, prefix) {
   644  			return prefix
   645  		}
   646  	}
   647  	return ""
   648  }
   649  
   650  // ModuleMode specifies whether to prefer modules when looking up code sources.
   651  type ModuleMode int
   652  
   653  const (
   654  	IgnoreMod ModuleMode = iota
   655  	PreferMod
   656  )
   657  
   658  // RepoRootForImportPath analyzes importPath to determine the
   659  // version control system, and code repository to use.
   660  func RepoRootForImportPath(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) {
   661  	rr, err := repoRootFromVCSPaths(importPath, security, vcsPaths)
   662  	if err == errUnknownSite {
   663  		rr, err = repoRootForImportDynamic(importPath, mod, security)
   664  		if err != nil {
   665  			err = load.ImportErrorf(importPath, "unrecognized import path %q: %v", importPath, err)
   666  		}
   667  	}
   668  	if err != nil {
   669  		rr1, err1 := repoRootFromVCSPaths(importPath, security, vcsPathsAfterDynamic)
   670  		if err1 == nil {
   671  			rr = rr1
   672  			err = nil
   673  		}
   674  	}
   675  
   676  	// Should have been taken care of above, but make sure.
   677  	if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.Root, "...") {
   678  		// Do not allow wildcards in the repo root.
   679  		rr = nil
   680  		err = load.ImportErrorf(importPath, "cannot expand ... in %q", importPath)
   681  	}
   682  	return rr, err
   683  }
   684  
   685  var errUnknownSite = errors.New("dynamic lookup required to find mapping")
   686  
   687  // repoRootFromVCSPaths attempts to map importPath to a repoRoot
   688  // using the mappings defined in vcsPaths.
   689  func repoRootFromVCSPaths(importPath string, security web.SecurityMode, vcsPaths []*vcsPath) (*RepoRoot, error) {
   690  	// A common error is to use https://packagepath because that's what
   691  	// hg and git require. Diagnose this helpfully.
   692  	if prefix := httpPrefix(importPath); prefix != "" {
   693  		// The importPath has been cleaned, so has only one slash. The pattern
   694  		// ignores the slashes; the error message puts them back on the RHS at least.
   695  		return nil, fmt.Errorf("%q not allowed in import path", prefix+"//")
   696  	}
   697  	for _, srv := range vcsPaths {
   698  		if !strings.HasPrefix(importPath, srv.prefix) {
   699  			continue
   700  		}
   701  		m := srv.regexp.FindStringSubmatch(importPath)
   702  		if m == nil {
   703  			if srv.prefix != "" {
   704  				return nil, load.ImportErrorf(importPath, "invalid %s import path %q", srv.prefix, importPath)
   705  			}
   706  			continue
   707  		}
   708  
   709  		// Build map of named subexpression matches for expand.
   710  		match := map[string]string{
   711  			"prefix": srv.prefix,
   712  			"import": importPath,
   713  		}
   714  		for i, name := range srv.regexp.SubexpNames() {
   715  			if name != "" && match[name] == "" {
   716  				match[name] = m[i]
   717  			}
   718  		}
   719  		if srv.vcs != "" {
   720  			match["vcs"] = expand(match, srv.vcs)
   721  		}
   722  		if srv.repo != "" {
   723  			match["repo"] = expand(match, srv.repo)
   724  		}
   725  		if srv.check != nil {
   726  			if err := srv.check(match); err != nil {
   727  				return nil, err
   728  			}
   729  		}
   730  		vcs := vcsByCmd(match["vcs"])
   731  		if vcs == nil {
   732  			return nil, fmt.Errorf("unknown version control system %q", match["vcs"])
   733  		}
   734  		var repoURL string
   735  		if !srv.schemelessRepo {
   736  			repoURL = match["repo"]
   737  		} else {
   738  			scheme := vcs.scheme[0] // default to first scheme
   739  			repo := match["repo"]
   740  			if vcs.pingCmd != "" {
   741  				// If we know how to test schemes, scan to find one.
   742  				for _, s := range vcs.scheme {
   743  					if security == web.SecureOnly && !vcs.isSecureScheme(s) {
   744  						continue
   745  					}
   746  					if vcs.ping(s, repo) == nil {
   747  						scheme = s
   748  						break
   749  					}
   750  				}
   751  			}
   752  			repoURL = scheme + "://" + repo
   753  		}
   754  		rr := &RepoRoot{
   755  			Repo: repoURL,
   756  			Root: match["root"],
   757  			VCS:  vcs.cmd,
   758  			vcs:  vcs,
   759  		}
   760  		return rr, nil
   761  	}
   762  	return nil, errUnknownSite
   763  }
   764  
   765  // urlForImportPath returns a partially-populated URL for the given Go import path.
   766  //
   767  // The URL leaves the Scheme field blank so that web.Get will try any scheme
   768  // allowed by the selected security mode.
   769  func urlForImportPath(importPath string) (*urlpkg.URL, error) {
   770  	slash := strings.Index(importPath, "/")
   771  	if slash < 0 {
   772  		slash = len(importPath)
   773  	}
   774  	host, path := importPath[:slash], importPath[slash:]
   775  	if !strings.Contains(host, ".") {
   776  		return nil, errors.New("import path does not begin with hostname")
   777  	}
   778  	if len(path) == 0 {
   779  		path = "/"
   780  	}
   781  	return &urlpkg.URL{Host: host, Path: path, RawQuery: "go-get=1"}, nil
   782  }
   783  
   784  // repoRootForImportDynamic finds a *RepoRoot for a custom domain that's not
   785  // statically known by repoRootForImportPathStatic.
   786  //
   787  // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld".
   788  func repoRootForImportDynamic(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) {
   789  	url, err := urlForImportPath(importPath)
   790  	if err != nil {
   791  		return nil, err
   792  	}
   793  	resp, err := web.Get(security, url)
   794  	if err != nil {
   795  		msg := "https fetch: %v"
   796  		if security == web.Insecure {
   797  			msg = "http/" + msg
   798  		}
   799  		return nil, fmt.Errorf(msg, err)
   800  	}
   801  	body := resp.Body
   802  	defer body.Close()
   803  	imports, err := parseMetaGoImports(body, mod)
   804  	if len(imports) == 0 {
   805  		if respErr := resp.Err(); respErr != nil {
   806  			// If the server's status was not OK, prefer to report that instead of
   807  			// an XML parse error.
   808  			return nil, respErr
   809  		}
   810  	}
   811  	if err != nil {
   812  		return nil, fmt.Errorf("parsing %s: %v", importPath, err)
   813  	}
   814  	// Find the matched meta import.
   815  	mmi, err := matchGoImport(imports, importPath)
   816  	if err != nil {
   817  		if _, ok := err.(ImportMismatchError); !ok {
   818  			return nil, fmt.Errorf("parse %s: %v", url, err)
   819  		}
   820  		return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", resp.URL, err)
   821  	}
   822  	if cfg.BuildV {
   823  		log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, url)
   824  	}
   825  	// If the import was "uni.edu/bob/project", which said the
   826  	// prefix was "uni.edu" and the RepoRoot was "evilroot.com",
   827  	// make sure we don't trust Bob and check out evilroot.com to
   828  	// "uni.edu" yet (possibly overwriting/preempting another
   829  	// non-evil student). Instead, first verify the root and see
   830  	// if it matches Bob's claim.
   831  	if mmi.Prefix != importPath {
   832  		if cfg.BuildV {
   833  			log.Printf("get %q: verifying non-authoritative meta tag", importPath)
   834  		}
   835  		var imports []metaImport
   836  		url, imports, err = metaImportsForPrefix(mmi.Prefix, mod, security)
   837  		if err != nil {
   838  			return nil, err
   839  		}
   840  		metaImport2, err := matchGoImport(imports, importPath)
   841  		if err != nil || mmi != metaImport2 {
   842  			return nil, fmt.Errorf("%s and %s disagree about go-import for %s", resp.URL, url, mmi.Prefix)
   843  		}
   844  	}
   845  
   846  	if err := validateRepoRoot(mmi.RepoRoot); err != nil {
   847  		return nil, fmt.Errorf("%s: invalid repo root %q: %v", resp.URL, mmi.RepoRoot, err)
   848  	}
   849  	vcs := vcsByCmd(mmi.VCS)
   850  	if vcs == nil && mmi.VCS != "mod" {
   851  		return nil, fmt.Errorf("%s: unknown vcs %q", resp.URL, mmi.VCS)
   852  	}
   853  
   854  	rr := &RepoRoot{
   855  		Repo:     mmi.RepoRoot,
   856  		Root:     mmi.Prefix,
   857  		IsCustom: true,
   858  		VCS:      mmi.VCS,
   859  		vcs:      vcs,
   860  	}
   861  	return rr, nil
   862  }
   863  
   864  // validateRepoRoot returns an error if repoRoot does not seem to be
   865  // a valid URL with scheme.
   866  func validateRepoRoot(repoRoot string) error {
   867  	url, err := urlpkg.Parse(repoRoot)
   868  	if err != nil {
   869  		return err
   870  	}
   871  	if url.Scheme == "" {
   872  		return errors.New("no scheme")
   873  	}
   874  	if url.Scheme == "file" {
   875  		return errors.New("file scheme disallowed")
   876  	}
   877  	return nil
   878  }
   879  
   880  var fetchGroup singleflight.Group
   881  var (
   882  	fetchCacheMu sync.Mutex
   883  	fetchCache   = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix
   884  )
   885  
   886  // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag
   887  // and returns its HTML discovery URL and the parsed metaImport lines
   888  // found on the page.
   889  //
   890  // The importPath is of the form "golang.org/x/tools".
   891  // It is an error if no imports are found.
   892  // url will still be valid if err != nil.
   893  // The returned url will be of the form "https://golang.org/x/tools?go-get=1"
   894  func metaImportsForPrefix(importPrefix string, mod ModuleMode, security web.SecurityMode) (*urlpkg.URL, []metaImport, error) {
   895  	setCache := func(res fetchResult) (fetchResult, error) {
   896  		fetchCacheMu.Lock()
   897  		defer fetchCacheMu.Unlock()
   898  		fetchCache[importPrefix] = res
   899  		return res, nil
   900  	}
   901  
   902  	resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) {
   903  		fetchCacheMu.Lock()
   904  		if res, ok := fetchCache[importPrefix]; ok {
   905  			fetchCacheMu.Unlock()
   906  			return res, nil
   907  		}
   908  		fetchCacheMu.Unlock()
   909  
   910  		url, err := urlForImportPath(importPrefix)
   911  		if err != nil {
   912  			return setCache(fetchResult{err: err})
   913  		}
   914  		resp, err := web.Get(security, url)
   915  		if err != nil {
   916  			return setCache(fetchResult{url: url, err: fmt.Errorf("fetching %s: %v", importPrefix, err)})
   917  		}
   918  		body := resp.Body
   919  		defer body.Close()
   920  		imports, err := parseMetaGoImports(body, mod)
   921  		if len(imports) == 0 {
   922  			if respErr := resp.Err(); respErr != nil {
   923  				// If the server's status was not OK, prefer to report that instead of
   924  				// an XML parse error.
   925  				return setCache(fetchResult{url: url, err: respErr})
   926  			}
   927  		}
   928  		if err != nil {
   929  			return setCache(fetchResult{url: url, err: fmt.Errorf("parsing %s: %v", resp.URL, err)})
   930  		}
   931  		if len(imports) == 0 {
   932  			err = fmt.Errorf("fetching %s: no go-import meta tag found in %s", importPrefix, resp.URL)
   933  		}
   934  		return setCache(fetchResult{url: url, imports: imports, err: err})
   935  	})
   936  	res := resi.(fetchResult)
   937  	return res.url, res.imports, res.err
   938  }
   939  
   940  type fetchResult struct {
   941  	url     *urlpkg.URL
   942  	imports []metaImport
   943  	err     error
   944  }
   945  
   946  // metaImport represents the parsed <meta name="go-import"
   947  // content="prefix vcs reporoot" /> tags from HTML files.
   948  type metaImport struct {
   949  	Prefix, VCS, RepoRoot string
   950  }
   951  
   952  // pathPrefix reports whether sub is a prefix of s,
   953  // only considering entire path components.
   954  func pathPrefix(s, sub string) bool {
   955  	// strings.HasPrefix is necessary but not sufficient.
   956  	if !strings.HasPrefix(s, sub) {
   957  		return false
   958  	}
   959  	// The remainder after the prefix must either be empty or start with a slash.
   960  	rem := s[len(sub):]
   961  	return rem == "" || rem[0] == '/'
   962  }
   963  
   964  // A ImportMismatchError is returned where metaImport/s are present
   965  // but none match our import path.
   966  type ImportMismatchError struct {
   967  	importPath string
   968  	mismatches []string // the meta imports that were discarded for not matching our importPath
   969  }
   970  
   971  func (m ImportMismatchError) Error() string {
   972  	formattedStrings := make([]string, len(m.mismatches))
   973  	for i, pre := range m.mismatches {
   974  		formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath)
   975  	}
   976  	return strings.Join(formattedStrings, ", ")
   977  }
   978  
   979  // matchGoImport returns the metaImport from imports matching importPath.
   980  // An error is returned if there are multiple matches.
   981  // An ImportMismatchError is returned if none match.
   982  func matchGoImport(imports []metaImport, importPath string) (metaImport, error) {
   983  	match := -1
   984  
   985  	errImportMismatch := ImportMismatchError{importPath: importPath}
   986  	for i, im := range imports {
   987  		if !pathPrefix(importPath, im.Prefix) {
   988  			errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix)
   989  			continue
   990  		}
   991  
   992  		if match >= 0 {
   993  			if imports[match].VCS == "mod" && im.VCS != "mod" {
   994  				// All the mod entries precede all the non-mod entries.
   995  				// We have a mod entry and don't care about the rest,
   996  				// matching or not.
   997  				break
   998  			}
   999  			return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath)
  1000  		}
  1001  		match = i
  1002  	}
  1003  
  1004  	if match == -1 {
  1005  		return metaImport{}, errImportMismatch
  1006  	}
  1007  	return imports[match], nil
  1008  }
  1009  
  1010  // expand rewrites s to replace {k} with match[k] for each key k in match.
  1011  func expand(match map[string]string, s string) string {
  1012  	// We want to replace each match exactly once, and the result of expansion
  1013  	// must not depend on the iteration order through the map.
  1014  	// A strings.Replacer has exactly the properties we're looking for.
  1015  	oldNew := make([]string, 0, 2*len(match))
  1016  	for k, v := range match {
  1017  		oldNew = append(oldNew, "{"+k+"}", v)
  1018  	}
  1019  	return strings.NewReplacer(oldNew...).Replace(s)
  1020  }
  1021  
  1022  // vcsPaths defines the meaning of import paths referring to
  1023  // commonly-used VCS hosting sites (github.com/user/dir)
  1024  // and import paths referring to a fully-qualified importPath
  1025  // containing a VCS type (foo.com/repo.git/dir)
  1026  var vcsPaths = []*vcsPath{
  1027  	// Github
  1028  	{
  1029  		prefix: "github.com/",
  1030  		regexp: lazyregexp.New(`^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`),
  1031  		vcs:    "git",
  1032  		repo:   "https://{root}",
  1033  		check:  noVCSSuffix,
  1034  	},
  1035  
  1036  	// Bitbucket
  1037  	{
  1038  		prefix: "bitbucket.org/",
  1039  		regexp: lazyregexp.New(`^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`),
  1040  		repo:   "https://{root}",
  1041  		check:  bitbucketVCS,
  1042  	},
  1043  
  1044  	// IBM DevOps Services (JazzHub)
  1045  	{
  1046  		prefix: "hub.jazz.net/git/",
  1047  		regexp: lazyregexp.New(`^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`),
  1048  		vcs:    "git",
  1049  		repo:   "https://{root}",
  1050  		check:  noVCSSuffix,
  1051  	},
  1052  
  1053  	// Git at Apache
  1054  	{
  1055  		prefix: "git.apache.org/",
  1056  		regexp: lazyregexp.New(`^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`),
  1057  		vcs:    "git",
  1058  		repo:   "https://{root}",
  1059  	},
  1060  
  1061  	// Git at OpenStack
  1062  	{
  1063  		prefix: "git.openstack.org/",
  1064  		regexp: lazyregexp.New(`^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`),
  1065  		vcs:    "git",
  1066  		repo:   "https://{root}",
  1067  	},
  1068  
  1069  	// chiselapp.com for fossil
  1070  	{
  1071  		prefix: "chiselapp.com/",
  1072  		regexp: lazyregexp.New(`^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`),
  1073  		vcs:    "fossil",
  1074  		repo:   "https://{root}",
  1075  	},
  1076  
  1077  	// General syntax for any server.
  1078  	// Must be last.
  1079  	{
  1080  		regexp:         lazyregexp.New(`(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`),
  1081  		schemelessRepo: true,
  1082  	},
  1083  }
  1084  
  1085  // vcsPathsAfterDynamic gives additional vcsPaths entries
  1086  // to try after the dynamic HTML check.
  1087  // This gives those sites a chance to introduce <meta> tags
  1088  // as part of a graceful transition away from the hard-coded logic.
  1089  var vcsPathsAfterDynamic = []*vcsPath{
  1090  	// Launchpad. See golang.org/issue/11436.
  1091  	{
  1092  		prefix: "launchpad.net/",
  1093  		regexp: lazyregexp.New(`^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`),
  1094  		vcs:    "bzr",
  1095  		repo:   "https://{root}",
  1096  		check:  launchpadVCS,
  1097  	},
  1098  }
  1099  
  1100  // noVCSSuffix checks that the repository name does not
  1101  // end in .foo for any version control system foo.
  1102  // The usual culprit is ".git".
  1103  func noVCSSuffix(match map[string]string) error {
  1104  	repo := match["repo"]
  1105  	for _, vcs := range vcsList {
  1106  		if strings.HasSuffix(repo, "."+vcs.cmd) {
  1107  			return fmt.Errorf("invalid version control suffix in %s path", match["prefix"])
  1108  		}
  1109  	}
  1110  	return nil
  1111  }
  1112  
  1113  // bitbucketVCS determines the version control system for a
  1114  // Bitbucket repository, by using the Bitbucket API.
  1115  func bitbucketVCS(match map[string]string) error {
  1116  	if err := noVCSSuffix(match); err != nil {
  1117  		return err
  1118  	}
  1119  
  1120  	var resp struct {
  1121  		SCM string `json:"scm"`
  1122  	}
  1123  	url := &urlpkg.URL{
  1124  		Scheme:   "https",
  1125  		Host:     "api.bitbucket.org",
  1126  		Path:     expand(match, "/2.0/repositories/{bitname}"),
  1127  		RawQuery: "fields=scm",
  1128  	}
  1129  	data, err := web.GetBytes(url)
  1130  	if err != nil {
  1131  		if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 {
  1132  			// this may be a private repository. If so, attempt to determine which
  1133  			// VCS it uses. See issue 5375.
  1134  			root := match["root"]
  1135  			for _, vcs := range []string{"git", "hg"} {
  1136  				if vcsByCmd(vcs).ping("https", root) == nil {
  1137  					resp.SCM = vcs
  1138  					break
  1139  				}
  1140  			}
  1141  		}
  1142  
  1143  		if resp.SCM == "" {
  1144  			return err
  1145  		}
  1146  	} else {
  1147  		if err := json.Unmarshal(data, &resp); err != nil {
  1148  			return fmt.Errorf("decoding %s: %v", url, err)
  1149  		}
  1150  	}
  1151  
  1152  	if vcsByCmd(resp.SCM) != nil {
  1153  		match["vcs"] = resp.SCM
  1154  		if resp.SCM == "git" {
  1155  			match["repo"] += ".git"
  1156  		}
  1157  		return nil
  1158  	}
  1159  
  1160  	return fmt.Errorf("unable to detect version control system for bitbucket.org/ path")
  1161  }
  1162  
  1163  // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case,
  1164  // "foo" could be a series name registered in Launchpad with its own branch,
  1165  // and it could also be the name of a directory within the main project
  1166  // branch one level up.
  1167  func launchpadVCS(match map[string]string) error {
  1168  	if match["project"] == "" || match["series"] == "" {
  1169  		return nil
  1170  	}
  1171  	url := &urlpkg.URL{
  1172  		Scheme: "https",
  1173  		Host:   "code.launchpad.net",
  1174  		Path:   expand(match, "/{project}{series}/.bzr/branch-format"),
  1175  	}
  1176  	_, err := web.GetBytes(url)
  1177  	if err != nil {
  1178  		match["root"] = expand(match, "launchpad.net/{project}")
  1179  		match["repo"] = expand(match, "https://{root}")
  1180  	}
  1181  	return nil
  1182  }
  1183  

View as plain text