Source file src/cmd/go/internal/modfetch/codehost/git.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package codehost
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/sha256"
    11  	"encoding/base64"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"io/fs"
    16  	"net/url"
    17  	"os"
    18  	"os/exec"
    19  	"path/filepath"
    20  	"runtime"
    21  	"slices"
    22  	"sort"
    23  	"strconv"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"cmd/go/internal/base"
    29  	"cmd/go/internal/lockedfile"
    30  	"cmd/go/internal/par"
    31  	"cmd/go/internal/web"
    32  
    33  	"golang.org/x/mod/semver"
    34  )
    35  
    36  // LocalGitRepo is like Repo but accepts both Git remote references
    37  // and paths to repositories on the local file system.
    38  func LocalGitRepo(ctx context.Context, remote string) (Repo, error) {
    39  	return newGitRepoCached(ctx, remote, true)
    40  }
    41  
    42  // A notExistError wraps another error to retain its original text
    43  // but makes it opaquely equivalent to fs.ErrNotExist.
    44  type notExistError struct {
    45  	err error
    46  }
    47  
    48  func (e notExistError) Error() string   { return e.err.Error() }
    49  func (notExistError) Is(err error) bool { return err == fs.ErrNotExist }
    50  
    51  const gitWorkDirType = "git3"
    52  
    53  var gitRepoCache par.ErrCache[gitCacheKey, Repo]
    54  
    55  type gitCacheKey struct {
    56  	remote  string
    57  	localOK bool
    58  }
    59  
    60  func newGitRepoCached(ctx context.Context, remote string, localOK bool) (Repo, error) {
    61  	return gitRepoCache.Do(gitCacheKey{remote, localOK}, func() (Repo, error) {
    62  		return newGitRepo(ctx, remote, localOK)
    63  	})
    64  }
    65  
    66  func newGitRepo(ctx context.Context, remote string, localOK bool) (Repo, error) {
    67  	r := &gitRepo{remote: remote}
    68  	if strings.Contains(remote, "://") {
    69  		// This is a remote path.
    70  		var err error
    71  		r.dir, r.mu.Path, err = WorkDir(ctx, gitWorkDirType, r.remote)
    72  		if err != nil {
    73  			return nil, err
    74  		}
    75  
    76  		unlock, err := r.mu.Lock()
    77  		if err != nil {
    78  			return nil, err
    79  		}
    80  		defer unlock()
    81  
    82  		if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil {
    83  			if _, err := Run(ctx, r.dir, "git", "init", "--bare"); err != nil {
    84  				os.RemoveAll(r.dir)
    85  				return nil, err
    86  			}
    87  			// We could just say git fetch https://whatever later,
    88  			// but this lets us say git fetch origin instead, which
    89  			// is a little nicer. More importantly, using a named remote
    90  			// avoids a problem with Git LFS. See golang.org/issue/25605.
    91  			if _, err := Run(ctx, r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil {
    92  				os.RemoveAll(r.dir)
    93  				return nil, err
    94  			}
    95  			if runtime.GOOS == "windows" {
    96  				// Git for Windows by default does not support paths longer than
    97  				// MAX_PATH (260 characters) because that may interfere with navigation
    98  				// in some Windows programs. However, cmd/go should be able to handle
    99  				// long paths just fine, and we expect people to use 'go clean' to
   100  				// manipulate the module cache, so it should be harmless to set here,
   101  				// and in some cases may be necessary in order to download modules with
   102  				// long branch names.
   103  				//
   104  				// See https://github.com/git-for-windows/git/wiki/Git-cannot-create-a-file-or-directory-with-a-long-path.
   105  				if _, err := Run(ctx, r.dir, "git", "config", "core.longpaths", "true"); err != nil {
   106  					os.RemoveAll(r.dir)
   107  					return nil, err
   108  				}
   109  			}
   110  		}
   111  		r.remoteURL = r.remote
   112  		r.remote = "origin"
   113  	} else {
   114  		// Local path.
   115  		// Disallow colon (not in ://) because sometimes
   116  		// that's rcp-style host:path syntax and sometimes it's not (c:\work).
   117  		// The go command has always insisted on URL syntax for ssh.
   118  		if strings.Contains(remote, ":") {
   119  			return nil, fmt.Errorf("git remote cannot use host:path syntax")
   120  		}
   121  		if !localOK {
   122  			return nil, fmt.Errorf("git remote must not be local directory")
   123  		}
   124  		r.local = true
   125  		info, err := os.Stat(remote)
   126  		if err != nil {
   127  			return nil, err
   128  		}
   129  		if !info.IsDir() {
   130  			return nil, fmt.Errorf("%s exists but is not a directory", remote)
   131  		}
   132  		r.dir = remote
   133  		r.mu.Path = r.dir + ".lock"
   134  	}
   135  	return r, nil
   136  }
   137  
   138  type gitRepo struct {
   139  	ctx context.Context
   140  
   141  	remote, remoteURL string
   142  	local             bool
   143  	dir               string
   144  
   145  	mu lockedfile.Mutex // protects fetchLevel and git repo state
   146  
   147  	fetchLevel int
   148  
   149  	statCache par.ErrCache[string, *RevInfo]
   150  
   151  	refsOnce sync.Once
   152  	// refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master")
   153  	// to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6")
   154  	refs    map[string]string
   155  	refsErr error
   156  
   157  	localTagsOnce sync.Once
   158  	localTags     sync.Map // map[string]bool
   159  }
   160  
   161  const (
   162  	// How much have we fetched into the git repo (in this process)?
   163  	fetchNone = iota // nothing yet
   164  	fetchSome        // shallow fetches of individual hashes
   165  	fetchAll         // "fetch -t origin": get all remote branches and tags
   166  )
   167  
   168  // loadLocalTags loads tag references from the local git cache
   169  // into the map r.localTags.
   170  func (r *gitRepo) loadLocalTags(ctx context.Context) {
   171  	// The git protocol sends all known refs and ls-remote filters them on the client side,
   172  	// so we might as well record both heads and tags in one shot.
   173  	// Most of the time we only care about tags but sometimes we care about heads too.
   174  	out, err := Run(ctx, r.dir, "git", "tag", "-l")
   175  	if err != nil {
   176  		return
   177  	}
   178  
   179  	for _, line := range strings.Split(string(out), "\n") {
   180  		if line != "" {
   181  			r.localTags.Store(line, true)
   182  		}
   183  	}
   184  }
   185  
   186  func (r *gitRepo) CheckReuse(ctx context.Context, old *Origin, subdir string) error {
   187  	if old == nil {
   188  		return fmt.Errorf("missing origin")
   189  	}
   190  	if old.VCS != "git" || old.URL != r.remoteURL {
   191  		return fmt.Errorf("origin moved from %v %q to %v %q", old.VCS, old.URL, "git", r.remoteURL)
   192  	}
   193  	if old.Subdir != subdir {
   194  		return fmt.Errorf("origin moved from %v %q %q to %v %q %q", old.VCS, old.URL, old.Subdir, "git", r.remoteURL, subdir)
   195  	}
   196  
   197  	// Note: Can have Hash with no Ref and no TagSum and no RepoSum,
   198  	// meaning the Hash simply has to remain in the repo.
   199  	// In that case we assume it does in the absence of any real way to check.
   200  	// But if neither Hash nor TagSum is present, we have nothing to check,
   201  	// which we take to mean we didn't record enough information to be sure.
   202  	if old.Hash == "" && old.TagSum == "" && old.RepoSum == "" {
   203  		return fmt.Errorf("non-specific origin")
   204  	}
   205  
   206  	r.loadRefs(ctx)
   207  	if r.refsErr != nil {
   208  		return r.refsErr
   209  	}
   210  
   211  	if old.Ref != "" {
   212  		hash, ok := r.refs[old.Ref]
   213  		if !ok {
   214  			return fmt.Errorf("ref %q deleted", old.Ref)
   215  		}
   216  		if hash != old.Hash {
   217  			return fmt.Errorf("ref %q moved from %s to %s", old.Ref, old.Hash, hash)
   218  		}
   219  	}
   220  	if old.TagSum != "" {
   221  		tags, err := r.Tags(ctx, old.TagPrefix)
   222  		if err != nil {
   223  			return err
   224  		}
   225  		if tags.Origin.TagSum != old.TagSum {
   226  			return fmt.Errorf("tags changed")
   227  		}
   228  	}
   229  	if old.RepoSum != "" {
   230  		if r.repoSum(r.refs) != old.RepoSum {
   231  			return fmt.Errorf("refs changed")
   232  		}
   233  	}
   234  	return nil
   235  }
   236  
   237  // loadRefs loads heads and tags references from the remote into the map r.refs.
   238  // The result is cached in memory.
   239  func (r *gitRepo) loadRefs(ctx context.Context) (map[string]string, error) {
   240  	r.refsOnce.Do(func() {
   241  		// The git protocol sends all known refs and ls-remote filters them on the client side,
   242  		// so we might as well record both heads and tags in one shot.
   243  		// Most of the time we only care about tags but sometimes we care about heads too.
   244  		release, err := base.AcquireNet()
   245  		if err != nil {
   246  			r.refsErr = err
   247  			return
   248  		}
   249  		out, gitErr := Run(ctx, r.dir, "git", "ls-remote", "-q", r.remote)
   250  		release()
   251  
   252  		if gitErr != nil {
   253  			if rerr, ok := gitErr.(*RunError); ok {
   254  				if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) {
   255  					rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information."
   256  				}
   257  			}
   258  
   259  			// If the remote URL doesn't exist at all, ideally we should treat the whole
   260  			// repository as nonexistent by wrapping the error in a notExistError.
   261  			// For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL
   262  			// ourselves and see what code it serves.
   263  			if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
   264  				if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) {
   265  					gitErr = notExistError{gitErr}
   266  				}
   267  			}
   268  
   269  			r.refsErr = gitErr
   270  			return
   271  		}
   272  
   273  		refs := make(map[string]string)
   274  		for _, line := range strings.Split(string(out), "\n") {
   275  			f := strings.Fields(line)
   276  			if len(f) != 2 {
   277  				continue
   278  			}
   279  			if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
   280  				refs[f[1]] = f[0]
   281  			}
   282  		}
   283  		for ref, hash := range refs {
   284  			if k, found := strings.CutSuffix(ref, "^{}"); found { // record unwrapped annotated tag as value of tag
   285  				refs[k] = hash
   286  				delete(refs, ref)
   287  			}
   288  		}
   289  		r.refs = refs
   290  	})
   291  	return r.refs, r.refsErr
   292  }
   293  
   294  func (r *gitRepo) Tags(ctx context.Context, prefix string) (*Tags, error) {
   295  	refs, err := r.loadRefs(ctx)
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  
   300  	tags := &Tags{
   301  		Origin: &Origin{
   302  			VCS:       "git",
   303  			URL:       r.remoteURL,
   304  			TagPrefix: prefix,
   305  		},
   306  		List: []Tag{},
   307  	}
   308  	for ref, hash := range refs {
   309  		if !strings.HasPrefix(ref, "refs/tags/") {
   310  			continue
   311  		}
   312  		tag := ref[len("refs/tags/"):]
   313  		if !strings.HasPrefix(tag, prefix) {
   314  			continue
   315  		}
   316  		tags.List = append(tags.List, Tag{tag, hash})
   317  	}
   318  	sort.Slice(tags.List, func(i, j int) bool {
   319  		return tags.List[i].Name < tags.List[j].Name
   320  	})
   321  
   322  	dir := prefix[:strings.LastIndex(prefix, "/")+1]
   323  	h := sha256.New()
   324  	for _, tag := range tags.List {
   325  		if isOriginTag(strings.TrimPrefix(tag.Name, dir)) {
   326  			fmt.Fprintf(h, "%q %s\n", tag.Name, tag.Hash)
   327  		}
   328  	}
   329  	tags.Origin.TagSum = "t1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
   330  	return tags, nil
   331  }
   332  
   333  // repoSum returns a checksum of the entire repo state,
   334  // which can be checked (as Origin.RepoSum) to cache
   335  // the absence of a specific module version.
   336  // The caller must supply refs, the result of a successful r.loadRefs.
   337  func (r *gitRepo) repoSum(refs map[string]string) string {
   338  	var list []string
   339  	for ref := range refs {
   340  		list = append(list, ref)
   341  	}
   342  	sort.Strings(list)
   343  	h := sha256.New()
   344  	for _, ref := range list {
   345  		fmt.Fprintf(h, "%q %s\n", ref, refs[ref])
   346  	}
   347  	return "r1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
   348  }
   349  
   350  // unknownRevisionInfo returns a RevInfo containing an Origin containing a RepoSum of refs,
   351  // for use when returning an UnknownRevisionError.
   352  func (r *gitRepo) unknownRevisionInfo(refs map[string]string) *RevInfo {
   353  	return &RevInfo{
   354  		Origin: &Origin{
   355  			VCS:     "git",
   356  			URL:     r.remoteURL,
   357  			RepoSum: r.repoSum(refs),
   358  		},
   359  	}
   360  }
   361  
   362  func (r *gitRepo) Latest(ctx context.Context) (*RevInfo, error) {
   363  	refs, err := r.loadRefs(ctx)
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  	if refs["HEAD"] == "" {
   368  		return nil, ErrNoCommits
   369  	}
   370  	statInfo, err := r.Stat(ctx, refs["HEAD"])
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  
   375  	// Stat may return cached info, so make a copy to modify here.
   376  	info := new(RevInfo)
   377  	*info = *statInfo
   378  	info.Origin = new(Origin)
   379  	if statInfo.Origin != nil {
   380  		*info.Origin = *statInfo.Origin
   381  	}
   382  	info.Origin.Ref = "HEAD"
   383  	info.Origin.Hash = refs["HEAD"]
   384  
   385  	return info, nil
   386  }
   387  
   388  // findRef finds some ref name for the given hash,
   389  // for use when the server requires giving a ref instead of a hash.
   390  // There may be multiple ref names for a given hash,
   391  // in which case this returns some name - it doesn't matter which.
   392  func (r *gitRepo) findRef(ctx context.Context, hash string) (ref string, ok bool) {
   393  	refs, err := r.loadRefs(ctx)
   394  	if err != nil {
   395  		return "", false
   396  	}
   397  	for ref, h := range refs {
   398  		if h == hash {
   399  			return ref, true
   400  		}
   401  	}
   402  	return "", false
   403  }
   404  
   405  // minHashDigits is the minimum number of digits to require
   406  // before accepting a hex digit sequence as potentially identifying
   407  // a specific commit in a git repo. (Of course, users can always
   408  // specify more digits, and many will paste in all 40 digits,
   409  // but many of git's commands default to printing short hashes
   410  // as 7 digits.)
   411  const minHashDigits = 7
   412  
   413  // stat stats the given rev in the local repository,
   414  // or else it fetches more info from the remote repository and tries again.
   415  func (r *gitRepo) stat(ctx context.Context, rev string) (info *RevInfo, err error) {
   416  	if r.local {
   417  		return r.statLocal(ctx, rev, rev)
   418  	}
   419  
   420  	// Fast path: maybe rev is a hash we already have locally.
   421  	didStatLocal := false
   422  	if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   423  		if info, err := r.statLocal(ctx, rev, rev); err == nil {
   424  			return info, nil
   425  		}
   426  		didStatLocal = true
   427  	}
   428  
   429  	// Maybe rev is a tag we already have locally.
   430  	// (Note that we're excluding branches, which can be stale.)
   431  	r.localTagsOnce.Do(func() { r.loadLocalTags(ctx) })
   432  	if _, ok := r.localTags.Load(rev); ok {
   433  		return r.statLocal(ctx, rev, "refs/tags/"+rev)
   434  	}
   435  
   436  	// Maybe rev is the name of a tag or branch on the remote server.
   437  	// Or maybe it's the prefix of a hash of a named ref.
   438  	// Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash.
   439  	refs, err := r.loadRefs(ctx)
   440  	if err != nil {
   441  		return nil, err
   442  	}
   443  	// loadRefs may return an error if git fails, for example segfaults, or
   444  	// could not load a private repo, but defer checking to the else block
   445  	// below, in case we already have the rev in question in the local cache.
   446  	var ref, hash string
   447  	if refs["refs/tags/"+rev] != "" {
   448  		ref = "refs/tags/" + rev
   449  		hash = refs[ref]
   450  		// Keep rev as is: tags are assumed not to change meaning.
   451  	} else if refs["refs/heads/"+rev] != "" {
   452  		ref = "refs/heads/" + rev
   453  		hash = refs[ref]
   454  		rev = hash // Replace rev, because meaning of refs/heads/foo can change.
   455  	} else if rev == "HEAD" && refs["HEAD"] != "" {
   456  		ref = "HEAD"
   457  		hash = refs[ref]
   458  		rev = hash // Replace rev, because meaning of HEAD can change.
   459  	} else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   460  		// At the least, we have a hash prefix we can look up after the fetch below.
   461  		// Maybe we can map it to a full hash using the known refs.
   462  		prefix := rev
   463  		// Check whether rev is prefix of known ref hash.
   464  		for k, h := range refs {
   465  			if strings.HasPrefix(h, prefix) {
   466  				if hash != "" && hash != h {
   467  					// Hash is an ambiguous hash prefix.
   468  					// More information will not change that.
   469  					return nil, fmt.Errorf("ambiguous revision %s", rev)
   470  				}
   471  				if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
   472  					ref = k
   473  				}
   474  				rev = h
   475  				hash = h
   476  			}
   477  		}
   478  		if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash.
   479  			hash = rev
   480  		}
   481  	} else {
   482  		return r.unknownRevisionInfo(refs), &UnknownRevisionError{Rev: rev}
   483  	}
   484  
   485  	defer func() {
   486  		if info != nil {
   487  			info.Origin.Hash = info.Name
   488  			// There's a ref = hash below; don't write that hash down as Origin.Ref.
   489  			if ref != info.Origin.Hash {
   490  				info.Origin.Ref = ref
   491  			}
   492  		}
   493  	}()
   494  
   495  	// Protect r.fetchLevel and the "fetch more and more" sequence.
   496  	unlock, err := r.mu.Lock()
   497  	if err != nil {
   498  		return nil, err
   499  	}
   500  	defer unlock()
   501  
   502  	// Perhaps r.localTags did not have the ref when we loaded local tags,
   503  	// but we've since done fetches that pulled down the hash we need
   504  	// (or already have the hash we need, just without its tag).
   505  	// Either way, try a local stat before falling back to network I/O.
   506  	if !didStatLocal {
   507  		if info, err := r.statLocal(ctx, rev, hash); err == nil {
   508  			tag, fromTag := strings.CutPrefix(ref, "refs/tags/")
   509  			if fromTag && !slices.Contains(info.Tags, tag) {
   510  				// The local repo includes the commit hash we want, but it is missing
   511  				// the corresponding tag. Add that tag and try again.
   512  				_, err := Run(ctx, r.dir, "git", "tag", tag, hash)
   513  				if err != nil {
   514  					return nil, err
   515  				}
   516  				r.localTags.Store(tag, true)
   517  				return r.statLocal(ctx, rev, ref)
   518  			}
   519  			return info, err
   520  		}
   521  	}
   522  
   523  	// If we know a specific commit we need and its ref, fetch it.
   524  	// We do NOT fetch arbitrary hashes (when we don't know the ref)
   525  	// because we want to avoid ever importing a commit that isn't
   526  	// reachable from refs/tags/* or refs/heads/* or HEAD.
   527  	// Both Gerrit and GitHub expose every CL/PR as a named ref,
   528  	// and we don't want those commits masquerading as being real
   529  	// pseudo-versions in the main repo.
   530  	if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local {
   531  		r.fetchLevel = fetchSome
   532  		var refspec string
   533  		if ref == "HEAD" {
   534  			// Fetch the hash but give it a local name (refs/dummy),
   535  			// because that triggers the fetch behavior of creating any
   536  			// other known remote tags for the hash. We never use
   537  			// refs/dummy (it's not refs/tags/dummy) and it will be
   538  			// overwritten in the next command, and that's fine.
   539  			ref = hash
   540  			refspec = hash + ":refs/dummy"
   541  		} else {
   542  			// If we do know the ref name, save the mapping locally
   543  			// so that (if it is a tag) it can show up in localTags
   544  			// on a future call. Also, some servers refuse to allow
   545  			// full hashes in ref specs, so prefer a ref name if known.
   546  			refspec = ref + ":" + ref
   547  		}
   548  
   549  		release, err := base.AcquireNet()
   550  		if err != nil {
   551  			return nil, err
   552  		}
   553  		// We explicitly set protocol.version=2 for this command to work around
   554  		// an apparent Git bug introduced in Git 2.21 (commit 61c771),
   555  		// which causes the handler for protocol version 1 to sometimes miss
   556  		// tags that point to the requested commit (see https://go.dev/issue/56881).
   557  		_, err = Run(ctx, r.dir, "git", "-c", "protocol.version=2", "fetch", "-f", "--depth=1", r.remote, refspec)
   558  		release()
   559  
   560  		if err == nil {
   561  			return r.statLocal(ctx, rev, ref)
   562  		}
   563  		// Don't try to be smart about parsing the error.
   564  		// It's too complex and varies too much by git version.
   565  		// No matter what went wrong, fall back to a complete fetch.
   566  	}
   567  
   568  	// Last resort.
   569  	// Fetch all heads and tags and hope the hash we want is in the history.
   570  	if err := r.fetchRefsLocked(ctx); err != nil {
   571  		return nil, err
   572  	}
   573  
   574  	return r.statLocal(ctx, rev, rev)
   575  }
   576  
   577  // fetchRefsLocked fetches all heads and tags from the origin, along with the
   578  // ancestors of those commits.
   579  //
   580  // We only fetch heads and tags, not arbitrary other commits: we don't want to
   581  // pull in off-branch commits (such as rejected GitHub pull requests) that the
   582  // server may be willing to provide. (See the comments within the stat method
   583  // for more detail.)
   584  //
   585  // fetchRefsLocked requires that r.mu remain locked for the duration of the call.
   586  func (r *gitRepo) fetchRefsLocked(ctx context.Context) error {
   587  	if r.fetchLevel < fetchAll {
   588  		// NOTE: To work around a bug affecting Git clients up to at least 2.23.0
   589  		// (2019-08-16), we must first expand the set of local refs, and only then
   590  		// unshallow the repository as a separate fetch operation. (See
   591  		// golang.org/issue/34266 and
   592  		// https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.)
   593  
   594  		release, err := base.AcquireNet()
   595  		if err != nil {
   596  			return err
   597  		}
   598  		defer release()
   599  
   600  		if _, err := Run(ctx, r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
   601  			return err
   602  		}
   603  
   604  		if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil {
   605  			if _, err := Run(ctx, r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil {
   606  				return err
   607  			}
   608  		}
   609  
   610  		r.fetchLevel = fetchAll
   611  	}
   612  	return nil
   613  }
   614  
   615  // statLocal returns a new RevInfo describing rev in the local git repository.
   616  // It uses version as info.Version.
   617  func (r *gitRepo) statLocal(ctx context.Context, version, rev string) (*RevInfo, error) {
   618  	out, err := Run(ctx, r.dir, "git", "-c", "log.showsignature=false", "log", "--no-decorate", "-n1", "--format=format:%H %ct %D", rev, "--")
   619  	if err != nil {
   620  		// Return info with Origin.RepoSum if possible to allow caching of negative lookup.
   621  		var info *RevInfo
   622  		if refs, err := r.loadRefs(ctx); err == nil {
   623  			info = r.unknownRevisionInfo(refs)
   624  		}
   625  		return info, &UnknownRevisionError{Rev: rev}
   626  	}
   627  	f := strings.Fields(string(out))
   628  	if len(f) < 2 {
   629  		return nil, fmt.Errorf("unexpected response from git log: %q", out)
   630  	}
   631  	hash := f[0]
   632  	if strings.HasPrefix(hash, version) {
   633  		version = hash // extend to full hash
   634  	}
   635  	t, err := strconv.ParseInt(f[1], 10, 64)
   636  	if err != nil {
   637  		return nil, fmt.Errorf("invalid time from git log: %q", out)
   638  	}
   639  
   640  	info := &RevInfo{
   641  		Origin: &Origin{
   642  			VCS:  "git",
   643  			URL:  r.remoteURL,
   644  			Hash: hash,
   645  		},
   646  		Name:    hash,
   647  		Short:   ShortenSHA1(hash),
   648  		Time:    time.Unix(t, 0).UTC(),
   649  		Version: hash,
   650  	}
   651  	if !strings.HasPrefix(hash, rev) {
   652  		info.Origin.Ref = rev
   653  	}
   654  
   655  	// Add tags. Output looks like:
   656  	//	ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD
   657  	for i := 2; i < len(f); i++ {
   658  		if f[i] == "tag:" {
   659  			i++
   660  			if i < len(f) {
   661  				info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ","))
   662  			}
   663  		}
   664  	}
   665  	sort.Strings(info.Tags)
   666  
   667  	// Used hash as info.Version above.
   668  	// Use caller's suggested version if it appears in the tag list
   669  	// (filters out branch names, HEAD).
   670  	for _, tag := range info.Tags {
   671  		if version == tag {
   672  			info.Version = version
   673  		}
   674  	}
   675  
   676  	return info, nil
   677  }
   678  
   679  func (r *gitRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
   680  	if rev == "latest" {
   681  		return r.Latest(ctx)
   682  	}
   683  	return r.statCache.Do(rev, func() (*RevInfo, error) {
   684  		return r.stat(ctx, rev)
   685  	})
   686  }
   687  
   688  func (r *gitRepo) ReadFile(ctx context.Context, rev, file string, maxSize int64) ([]byte, error) {
   689  	// TODO: Could use git cat-file --batch.
   690  	info, err := r.Stat(ctx, rev) // download rev into local git repo
   691  	if err != nil {
   692  		return nil, err
   693  	}
   694  	out, err := Run(ctx, r.dir, "git", "cat-file", "blob", info.Name+":"+file)
   695  	if err != nil {
   696  		return nil, fs.ErrNotExist
   697  	}
   698  	return out, nil
   699  }
   700  
   701  func (r *gitRepo) RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error) {
   702  	info, err := r.Stat(ctx, rev)
   703  	if err != nil {
   704  		return "", err
   705  	}
   706  	rev = info.Name // expand hash prefixes
   707  
   708  	// describe sets tag and err using 'git for-each-ref' and reports whether the
   709  	// result is definitive.
   710  	describe := func() (definitive bool) {
   711  		var out []byte
   712  		out, err = Run(ctx, r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev)
   713  		if err != nil {
   714  			return true
   715  		}
   716  
   717  		// prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix
   718  		var highest string
   719  		for _, line := range strings.Split(string(out), "\n") {
   720  			line = strings.TrimSpace(line)
   721  			// git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here
   722  			// instead gives support for git v2.7.0.
   723  			if !strings.HasPrefix(line, "refs/tags/") {
   724  				continue
   725  			}
   726  			line = line[len("refs/tags/"):]
   727  
   728  			if !strings.HasPrefix(line, prefix) {
   729  				continue
   730  			}
   731  			if !allowed(line) {
   732  				continue
   733  			}
   734  
   735  			semtag := line[len(prefix):]
   736  			if semver.Compare(semtag, highest) > 0 {
   737  				highest = semtag
   738  			}
   739  		}
   740  
   741  		if highest != "" {
   742  			tag = prefix + highest
   743  		}
   744  
   745  		return tag != "" && !AllHex(tag)
   746  	}
   747  
   748  	if describe() {
   749  		return tag, err
   750  	}
   751  
   752  	// Git didn't find a version tag preceding the requested rev.
   753  	// See whether any plausible tag exists.
   754  	tags, err := r.Tags(ctx, prefix+"v")
   755  	if err != nil {
   756  		return "", err
   757  	}
   758  	if len(tags.List) == 0 {
   759  		return "", nil
   760  	}
   761  
   762  	// There are plausible tags, but we don't know if rev is a descendent of any of them.
   763  	// Fetch the history to find out.
   764  
   765  	unlock, err := r.mu.Lock()
   766  	if err != nil {
   767  		return "", err
   768  	}
   769  	defer unlock()
   770  
   771  	if err := r.fetchRefsLocked(ctx); err != nil {
   772  		return "", err
   773  	}
   774  
   775  	// If we've reached this point, we have all of the commits that are reachable
   776  	// from all heads and tags.
   777  	//
   778  	// The only refs we should be missing are those that are no longer reachable
   779  	// (or never were reachable) from any branch or tag, including the master
   780  	// branch, and we don't want to resolve them anyway (they're probably
   781  	// unreachable for a reason).
   782  	//
   783  	// Try one last time in case some other goroutine fetched rev while we were
   784  	// waiting on the lock.
   785  	describe()
   786  	return tag, err
   787  }
   788  
   789  func (r *gitRepo) DescendsFrom(ctx context.Context, rev, tag string) (bool, error) {
   790  	// The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so
   791  	// this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go
   792  	// already doesn't work with Git 1.7.1, so at least it's not a regression.
   793  	//
   794  	// git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or
   795  	// 1 if not.
   796  	_, err := Run(ctx, r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   797  
   798  	// Git reports "is an ancestor" with exit code 0 and "not an ancestor" with
   799  	// exit code 1.
   800  	// Unfortunately, if we've already fetched rev with a shallow history, git
   801  	// merge-base has been observed to report a false-negative, so don't stop yet
   802  	// even if the exit code is 1!
   803  	if err == nil {
   804  		return true, nil
   805  	}
   806  
   807  	// See whether the tag and rev even exist.
   808  	tags, err := r.Tags(ctx, tag)
   809  	if err != nil {
   810  		return false, err
   811  	}
   812  	if len(tags.List) == 0 {
   813  		return false, nil
   814  	}
   815  
   816  	// NOTE: r.stat is very careful not to fetch commits that we shouldn't know
   817  	// about, like rejected GitHub pull requests, so don't try to short-circuit
   818  	// that here.
   819  	if _, err = r.stat(ctx, rev); err != nil {
   820  		return false, err
   821  	}
   822  
   823  	// Now fetch history so that git can search for a path.
   824  	unlock, err := r.mu.Lock()
   825  	if err != nil {
   826  		return false, err
   827  	}
   828  	defer unlock()
   829  
   830  	if r.fetchLevel < fetchAll {
   831  		// Fetch the complete history for all refs and heads. It would be more
   832  		// efficient to only fetch the history from rev to tag, but that's much more
   833  		// complicated, and any kind of shallow fetch is fairly likely to trigger
   834  		// bugs in JGit servers and/or the go command anyway.
   835  		if err := r.fetchRefsLocked(ctx); err != nil {
   836  			return false, err
   837  		}
   838  	}
   839  
   840  	_, err = Run(ctx, r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   841  	if err == nil {
   842  		return true, nil
   843  	}
   844  	if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 {
   845  		return false, nil
   846  	}
   847  	return false, err
   848  }
   849  
   850  func (r *gitRepo) ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
   851  	// TODO: Use maxSize or drop it.
   852  	args := []string{}
   853  	if subdir != "" {
   854  		args = append(args, "--", subdir)
   855  	}
   856  	info, err := r.Stat(ctx, rev) // download rev into local git repo
   857  	if err != nil {
   858  		return nil, err
   859  	}
   860  
   861  	unlock, err := r.mu.Lock()
   862  	if err != nil {
   863  		return nil, err
   864  	}
   865  	defer unlock()
   866  
   867  	if err := ensureGitAttributes(r.dir); err != nil {
   868  		return nil, err
   869  	}
   870  
   871  	// Incredibly, git produces different archives depending on whether
   872  	// it is running on a Windows system or not, in an attempt to normalize
   873  	// text file line endings. Setting -c core.autocrlf=input means only
   874  	// translate files on the way into the repo, not on the way out (archive).
   875  	// The -c core.eol=lf should be unnecessary but set it anyway.
   876  	archive, err := Run(ctx, r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
   877  	if err != nil {
   878  		if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) {
   879  			return nil, fs.ErrNotExist
   880  		}
   881  		return nil, err
   882  	}
   883  
   884  	return io.NopCloser(bytes.NewReader(archive)), nil
   885  }
   886  
   887  // ensureGitAttributes makes sure export-subst and export-ignore features are
   888  // disabled for this repo. This is intended to be run prior to running git
   889  // archive so that zip files are generated that produce consistent ziphashes
   890  // for a given revision, independent of variables such as git version and the
   891  // size of the repo.
   892  //
   893  // See: https://github.com/golang/go/issues/27153
   894  func ensureGitAttributes(repoDir string) (err error) {
   895  	const attr = "\n* -export-subst -export-ignore\n"
   896  
   897  	d := repoDir + "/info"
   898  	p := d + "/attributes"
   899  
   900  	if err := os.MkdirAll(d, 0755); err != nil {
   901  		return err
   902  	}
   903  
   904  	f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
   905  	if err != nil {
   906  		return err
   907  	}
   908  	defer func() {
   909  		closeErr := f.Close()
   910  		if closeErr != nil {
   911  			err = closeErr
   912  		}
   913  	}()
   914  
   915  	b, err := io.ReadAll(f)
   916  	if err != nil {
   917  		return err
   918  	}
   919  	if !bytes.HasSuffix(b, []byte(attr)) {
   920  		_, err := f.WriteString(attr)
   921  		return err
   922  	}
   923  
   924  	return nil
   925  }
   926  

View as plain text