// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package codehost defines the interface implemented by a code hosting source, // along with support code for use by implementations. package codehost import ( "bytes" "context" "crypto/sha256" "fmt" "io" "io/fs" "os" "os/exec" "path/filepath" "strings" "sync" "time" "cmd/go/internal/cfg" "cmd/go/internal/lockedfile" "cmd/go/internal/str" "golang.org/x/mod/module" "golang.org/x/mod/semver" ) // Downloaded size limits. const ( MaxGoMod = 16 << 20 // maximum size of go.mod file MaxLICENSE = 16 << 20 // maximum size of LICENSE file MaxZipFile = 500 << 20 // maximum size of downloaded zip file ) // A Repo represents a code hosting source. // Typical implementations include local version control repositories, // remote version control servers, and code hosting sites. // // A Repo must be safe for simultaneous use by multiple goroutines, // and callers must not modify returned values, which may be cached and shared. type Repo interface { // CheckReuse checks whether the old origin information // remains up to date. If so, whatever cached object it was // taken from can be reused. // The subdir gives subdirectory name where the module root is expected to be found, // "" for the root or "sub/dir" for a subdirectory (no trailing slash). CheckReuse(ctx context.Context, old *Origin, subdir string) error // List lists all tags with the given prefix. Tags(ctx context.Context, prefix string) (*Tags, error) // Stat returns information about the revision rev. // A revision can be any identifier known to the underlying service: // commit hash, branch, tag, and so on. Stat(ctx context.Context, rev string) (*RevInfo, error) // Latest returns the latest revision on the default branch, // whatever that means in the underlying implementation. Latest(ctx context.Context) (*RevInfo, error) // ReadFile reads the given file in the file tree corresponding to revision rev. // It should refuse to read more than maxSize bytes. // // If the requested file does not exist it should return an error for which // os.IsNotExist(err) returns true. ReadFile(ctx context.Context, rev, file string, maxSize int64) (data []byte, err error) // ReadZip downloads a zip file for the subdir subdirectory // of the given revision to a new file in a given temporary directory. // It should refuse to read more than maxSize bytes. // It returns a ReadCloser for a streamed copy of the zip file. // All files in the zip file are expected to be // nested in a single top-level directory, whose name is not specified. ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) // RecentTag returns the most recent tag on rev or one of its predecessors // with the given prefix. allowed may be used to filter out unwanted versions. RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error) // DescendsFrom reports whether rev or any of its ancestors has the given tag. // // DescendsFrom must return true for any tag returned by RecentTag for the // same revision. DescendsFrom(ctx context.Context, rev, tag string) (bool, error) } // An Origin describes the provenance of a given repo method result. // It can be passed to CheckReuse (usually in a different go command invocation) // to see whether the result remains up-to-date. type Origin struct { VCS string `json:",omitempty"` // "git" etc URL string `json:",omitempty"` // URL of repository Subdir string `json:",omitempty"` // subdirectory in repo Hash string `json:",omitempty"` // commit hash or ID // If TagSum is non-empty, then the resolution of this module version // depends on the set of tags present in the repo, specifically the tags // of the form TagPrefix + a valid semver version. // If the matching repo tags and their commit hashes still hash to TagSum, // the Origin is still valid (at least as far as the tags are concerned). // The exact checksum is up to the Repo implementation; see (*gitRepo).Tags. TagPrefix string `json:",omitempty"` TagSum string `json:",omitempty"` // If Ref is non-empty, then the resolution of this module version // depends on Ref resolving to the revision identified by Hash. // If Ref still resolves to Hash, the Origin is still valid (at least as far as Ref is concerned). // For Git, the Ref is a full ref like "refs/heads/main" or "refs/tags/v1.2.3", // and the Hash is the Git object hash the ref maps to. // Other VCS might choose differently, but the idea is that Ref is the name // with a mutable meaning while Hash is a name with an immutable meaning. Ref string `json:",omitempty"` // If RepoSum is non-empty, then the resolution of this module version // failed due to the repo being available but the version not being present. // This depends on the entire state of the repo, which RepoSum summarizes. // For Git, this is a hash of all the refs and their hashes. RepoSum string `json:",omitempty"` } // A Tags describes the available tags in a code repository. type Tags struct { Origin *Origin List []Tag } // A Tag describes a single tag in a code repository. type Tag struct { Name string Hash string // content hash identifying tag's content, if available } // isOriginTag reports whether tag should be preserved // in the Tags method's Origin calculation. // We can safely ignore tags that are not look like pseudo-versions, // because ../coderepo.go's (*codeRepo).Versions ignores them too. // We can also ignore non-semver tags, but we have to include semver // tags with extra suffixes, because the pseudo-version base finder uses them. func isOriginTag(tag string) bool { // modfetch.(*codeRepo).Versions uses Canonical == tag, // but pseudo-version calculation has a weaker condition that // the canonical is a prefix of the tag. // Include those too, so that if any new one appears, we'll invalidate the cache entry. // This will lead to spurious invalidation of version list results, // but tags of this form being created should be fairly rare // (and invalidate pseudo-version results anyway). c := semver.Canonical(tag) return c != "" && strings.HasPrefix(tag, c) && !module.IsPseudoVersion(tag) } // A RevInfo describes a single revision in a source code repository. type RevInfo struct { Origin *Origin Name string // complete ID in underlying repository Short string // shortened ID, for use in pseudo-version Version string // version used in lookup Time time.Time // commit time Tags []string // known tags for commit } // UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a // revision rather than a file. type UnknownRevisionError struct { Rev string } func (e *UnknownRevisionError) Error() string { return "unknown revision " + e.Rev } func (UnknownRevisionError) Is(err error) bool { return err == fs.ErrNotExist } // ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given // repository or module contains no commits. var ErrNoCommits error = noCommitsError{} type noCommitsError struct{} func (noCommitsError) Error() string { return "no commits" } func (noCommitsError) Is(err error) bool { return err == fs.ErrNotExist } // AllHex reports whether the revision rev is entirely lower-case hexadecimal digits. func AllHex(rev string) bool { for i := 0; i < len(rev); i++ { c := rev[i] if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' { continue } return false } return true } // ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length // used in pseudo-versions (12 hex digits). func ShortenSHA1(rev string) string { if AllHex(rev) && len(rev) == 40 { return rev[:12] } return rev } // WorkDir returns the name of the cached work directory to use for the // given repository type and name. func WorkDir(ctx context.Context, typ, name string) (dir, lockfile string, err error) { if cfg.GOMODCACHE == "" { return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set") } // We name the work directory for the SHA256 hash of the type and name. // We intentionally avoid the actual name both because of possible // conflicts with valid file system paths and because we want to ensure // that one checkout is never nested inside another. That nesting has // led to security problems in the past. if strings.Contains(typ, ":") { return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon") } key := typ + ":" + name dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key)))) xLog, buildX := cfg.BuildXWriter(ctx) if buildX { fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name) } if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil { return "", "", err } lockfile = dir + ".lock" if buildX { fmt.Fprintf(xLog, "# lock %s\n", lockfile) } unlock, err := lockedfile.MutexAt(lockfile).Lock() if err != nil { return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err) } defer unlock() data, err := os.ReadFile(dir + ".info") info, err2 := os.Stat(dir) if err == nil && err2 == nil && info.IsDir() { // Info file and directory both already exist: reuse. have := strings.TrimSuffix(string(data), "\n") if have != key { return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key) } if buildX { fmt.Fprintf(xLog, "# %s for %s %s\n", dir, typ, name) } return dir, lockfile, nil } // Info file or directory missing. Start from scratch. if xLog != nil { fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", dir, typ, name) } os.RemoveAll(dir) if err := os.MkdirAll(dir, 0777); err != nil { return "", "", err } if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil { os.RemoveAll(dir) return "", "", err } return dir, lockfile, nil } type RunError struct { Cmd string Err error Stderr []byte HelpText string } func (e *RunError) Error() string { text := e.Cmd + ": " + e.Err.Error() stderr := bytes.TrimRight(e.Stderr, "\n") if len(stderr) > 0 { text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t") } if len(e.HelpText) > 0 { text += "\n" + e.HelpText } return text } var dirLock sync.Map // Run runs the command line in the given directory // (an empty dir means the current directory). // It returns the standard output and, for a non-zero exit, // a *RunError indicating the command, exit status, and standard error. // Standard error is unavailable for commands that exit successfully. func Run(ctx context.Context, dir string, cmdline ...any) ([]byte, error) { return RunWithStdin(ctx, dir, nil, cmdline...) } // bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell. // See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html. var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`) func RunWithStdin(ctx context.Context, dir string, stdin io.Reader, cmdline ...any) ([]byte, error) { if dir != "" { muIface, ok := dirLock.Load(dir) if !ok { muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex)) } mu := muIface.(*sync.Mutex) mu.Lock() defer mu.Unlock() } cmd := str.StringList(cmdline...) if os.Getenv("TESTGOVCS") == "panic" { panic(fmt.Sprintf("use of vcs: %v", cmd)) } if xLog, ok := cfg.BuildXWriter(ctx); ok { text := new(strings.Builder) if dir != "" { text.WriteString("cd ") text.WriteString(dir) text.WriteString("; ") } for i, arg := range cmd { if i > 0 { text.WriteByte(' ') } switch { case strings.ContainsAny(arg, "'"): // Quote args that could be mistaken for quoted args. text.WriteByte('"') text.WriteString(bashQuoter.Replace(arg)) text.WriteByte('"') case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"): // Quote args that contain special characters, glob patterns, or spaces. text.WriteByte('\'') text.WriteString(arg) text.WriteByte('\'') default: text.WriteString(arg) } } fmt.Fprintf(xLog, "%s\n", text) start := time.Now() defer func() { fmt.Fprintf(xLog, "%.3fs # %s\n", time.Since(start).Seconds(), text) }() } // TODO: Impose limits on command output size. // TODO: Set environment to get English error messages. var stderr bytes.Buffer var stdout bytes.Buffer c := exec.CommandContext(ctx, cmd[0], cmd[1:]...) c.Cancel = func() error { return c.Process.Signal(os.Interrupt) } c.Dir = dir c.Stdin = stdin c.Stderr = &stderr c.Stdout = &stdout // For Git commands, manually supply GIT_DIR so Git works with safe.bareRepository=explicit set. Noop for other commands. c.Env = append(c.Environ(), "GIT_DIR="+dir) err := c.Run() if err != nil { err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err} } return stdout.Bytes(), err }