// Package dehub TODO needs package docs package dehub import ( "bytes" "encoding/hex" "errors" "fmt" "io" "os" "path/filepath" "strings" "dehub.dev/src/dehub.git/fs" "gopkg.in/src-d/go-billy.v4" "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/config" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/storage" "gopkg.in/src-d/go-git.v4/storage/filesystem" ) const ( // DehubDir defines the name of the directory where all dehub-related files are // expected to be found. DehubDir = ".dehub" ) var ( // ConfigPath defines the expected path to the Repo's configuration file. ConfigPath = filepath.Join(DehubDir, "config.yml") // Main defines the name of the main branch. Main = "main" // MainRefName defines the reference name of the main branch. MainRefName = plumbing.NewBranchReferenceName(Main) ) type openOpts struct { bare bool } // OpenOption is an option which can be passed to the OpenRepo function to // affect the Repo's behavior. type OpenOption func(*openOpts) // OpenBare returns an OpenOption which, if true is given, causes the OpenRepo // function to expect to open a bare repo. func OpenBare(bare bool) OpenOption { return func(o *openOpts) { o.bare = bare } } // Repo is an object which allows accessing and modifying the dehub repo. type Repo struct { GitRepo *git.Repository // GitDirFS corresponds to the .git directory (or the entire repo directory // if it's a bare repo) GitDirFS billy.Filesystem } func extractGitDirFS(storer storage.Storer) (billy.Filesystem, error) { dotGitFSer, ok := storer.(interface{ Filesystem() billy.Filesystem }) if !ok { return nil, fmt.Errorf("git storage object of type %T does not expose its underlying filesystem", storer) } return dotGitFSer.Filesystem(), nil } // OpenRepo opens the dehub repo in the given directory and returns the object // for it. // // The given path is expected to have a git repo and .dehub folder already // initialized. func OpenRepo(path string, options ...OpenOption) (*Repo, error) { var opts openOpts for _, opt := range options { opt(&opts) } r := Repo{} var err error openOpts := &git.PlainOpenOptions{ DetectDotGit: !opts.bare, } if r.GitRepo, err = git.PlainOpenWithOptions(path, openOpts); err != nil { return nil, fmt.Errorf("could not open git repo: %w", err) } else if r.GitDirFS, err = extractGitDirFS(r.GitRepo.Storer); err != nil { return nil, err } return &r, nil } type initOpts struct { bare bool remote bool } // InitOption is an option which can be passed into the Init functions to affect // their behavior. type InitOption func(*initOpts) // InitBare returns an InitOption which, if true is given, causes the Init // function to initialize the repo without a worktree. func InitBare(bare bool) InitOption { return func(o *initOpts) { o.bare = bare } } // InitRemote returns an InitOption which, if true is given, causes the Init // function to initialize the repo with certain git configuration options set // which make the repo able to be used as a remote repo. func InitRemote(remote bool) InitOption { return func(o *initOpts) { o.remote = remote } } // InitRepo will initialize a new repository at the given path. If bare is true // then the repository will not have a worktree. func InitRepo(path string, options ...InitOption) (*Repo, error) { var opts initOpts for _, opt := range options { opt(&opts) } var repo Repo var err error if repo.GitRepo, err = git.PlainInit(path, opts.bare); err != nil { return nil, fmt.Errorf("initializing git repo: %w", err) } else if repo.GitDirFS, err = extractGitDirFS(repo.GitRepo.Storer); err != nil { return nil, err } else if err = repo.init(opts); err != nil { return nil, fmt.Errorf("initializing repo with dehub defaults: %w", err) } return &repo, nil } // InitMemRepo initializes an empty repository which only exists in memory. func InitMemRepo(options ...InitOption) *Repo { var opts initOpts for _, opt := range options { opt(&opts) } fs := memfs.New() dotGitFS, err := fs.Chroot(git.GitDirName) if err != nil { panic(err) } storage := filesystem.NewStorage(dotGitFS, cache.NewObjectLRUDefault()) var worktree billy.Filesystem if !opts.bare { worktree = fs } r, err := git.Init(storage, worktree) if err != nil { panic(err) } repo := &Repo{GitRepo: r, GitDirFS: dotGitFS} if err := repo.init(opts); err != nil { panic(err) } return repo } func (r *Repo) initRemotePreReceive(bare bool) error { if err := r.GitDirFS.MkdirAll("hooks", 0755); err != nil { return fmt.Errorf("creating hooks directory: %w", err) } preRcvFlags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC preRcv, err := r.GitDirFS.OpenFile("hooks/pre-receive", preRcvFlags, 0755) if err != nil { return fmt.Errorf("opening hooks/pre-receive file: %w", err) } defer preRcv.Close() var preRcvBody string if bare { preRcvBody = "#!/bin/sh\nexec dehub hook -bare -pre-receive\n" } else { preRcvBody = "#!/bin/sh\nexec dehub hook -pre-receive\n" } if _, err := io.Copy(preRcv, bytes.NewBufferString(preRcvBody)); err != nil { return fmt.Errorf("writing to hooks/pre-receive: %w", err) } return nil } func (r *Repo) init(opts initOpts) error { headRef := plumbing.NewSymbolicReference(plumbing.HEAD, MainRefName) if err := r.GitRepo.Storer.SetReference(headRef); err != nil { return fmt.Errorf("setting HEAD reference to %q: %w", MainRefName, err) } if opts.remote { cfg, err := r.GitRepo.Config() if err != nil { return fmt.Errorf("opening git cfg: %w", err) } cfg.Raw = cfg.Raw.AddOption("http", config.NoSubsection, "receivepack", "true") if err := r.GitRepo.Storer.SetConfig(cfg); err != nil { return fmt.Errorf("storing modified git config: %w", err) } if err := r.initRemotePreReceive(opts.bare); err != nil { return fmt.Errorf("initializing pre-receive hook for remote-enabled repo: %w", err) } } return nil } func (r *Repo) billyFilesystem() (billy.Filesystem, error) { w, err := r.GitRepo.Worktree() if err != nil { return nil, fmt.Errorf("opening git worktree: %w", err) } return w.Filesystem, nil } var errTraverseRefNoMatch = errors.New("failed to find reference matching given predicate") // TraverseReferenceChain resolves a chain of references, calling the given // predicate on each one, and returning the first one for which the predicate // returns true. This method will return an error if it reaches the end of the // chain and the predicate still has not returned true. // // If a reference name is encountered which does not actually exist, then it is // assumed to be a hash reference to the zero hash. func (r *Repo) TraverseReferenceChain(refName plumbing.ReferenceName, pred func(*plumbing.Reference) bool) (*plumbing.Reference, error) { // TODO infinite loop checking for { ref, err := r.GitRepo.Storer.Reference(refName) if errors.Is(err, plumbing.ErrReferenceNotFound) { ref = plumbing.NewHashReference(refName, plumbing.ZeroHash) } else if err != nil { return nil, fmt.Errorf("resolving reference %q: %w", refName, err) } if pred(ref) { return ref, nil } else if ref.Type() != plumbing.SymbolicReference { return nil, errTraverseRefNoMatch } refName = ref.Target() } } // ErrNoBranchReference is returned from ReferenceToBranchName if no reference // in the reference chain is for a branch. var ErrNoBranchReference = errors.New("no branch reference found") // ReferenceToBranchName traverses a chain of references looking for the first // branch reference, and returns that name, or returns ErrNoBranchReference if // no branch reference is part of the chain. func (r *Repo) ReferenceToBranchName(refName plumbing.ReferenceName) (plumbing.ReferenceName, error) { // first check if the given refName is a branch, if so just return that. if refName.IsBranch() { return refName, nil } ref, err := r.TraverseReferenceChain(refName, func(ref *plumbing.Reference) bool { return ref.Target().IsBranch() }) if errors.Is(err, errTraverseRefNoMatch) { return "", ErrNoBranchReference } else if err != nil { return "", fmt.Errorf("traversing reference chain: %w", err) } return ref.Target(), nil } // ReferenceToHash fully resolves a reference to a hash. If a reference cannot // be resolved then plumbing.ZeroHash is returned. func (r *Repo) ReferenceToHash(refName plumbing.ReferenceName) (plumbing.Hash, error) { ref, err := r.TraverseReferenceChain(refName, func(ref *plumbing.Reference) bool { return ref.Type() == plumbing.HashReference }) if errors.Is(err, errTraverseRefNoMatch) { return plumbing.ZeroHash, errors.New("no hash in reference chain (is this even possible???)") } else if errors.Is(err, plumbing.ErrReferenceNotFound) { return plumbing.ZeroHash, nil } else if err != nil { return plumbing.ZeroHash, fmt.Errorf("traversing reference chain: %w", err) } return ref.Hash(), nil } // headFS returns an FS based on the HEAD commit, or if there is no HEAD commit // (it's an empty repo) an FS based on the raw filesystem. func (r *Repo) headFS() (fs.FS, error) { head, err := r.GetGitHead() if errors.Is(err, ErrHeadIsZero) { bfs, err := r.billyFilesystem() if err != nil { return nil, fmt.Errorf("getting underlying filesystem: %w", err) } return fs.FromBillyFilesystem(bfs), nil } else if err != nil { return nil, fmt.Errorf("could not get HEAD tree: %w", err) } return fs.FromTree(head.GitTree), nil } // GitCommit wraps a single git commit object, and also contains various fields // which are parsed out of it. It is used as a convenience type, in place of // having to manually retrieve and parse specific information out of commit // objects. type GitCommit struct { GitCommit *object.Commit // Fields based on that Commit, which can't be directly gleaned from it. GitTree *object.Tree Commit Commit Interface CommitInterface } // Root returns true if this commit is the root commit in its branch (i.e. it // has no parents) func (gc GitCommit) Root() bool { return gc.GitCommit.NumParents() == 0 } // GetGitCommit retrieves the commit at the given hash, and all of its sub-data // which can be pulled out of it. func (r *Repo) GetGitCommit(h plumbing.Hash) (gc GitCommit, err error) { if gc.GitCommit, err = r.GitRepo.CommitObject(h); err != nil { return gc, fmt.Errorf("getting git commit object: %w", err) } else if gc.GitTree, err = r.GitRepo.TreeObject(gc.GitCommit.TreeHash); err != nil { return gc, fmt.Errorf("getting git tree object %q: %w", gc.GitCommit.TreeHash, err) } else if gc.Commit.UnmarshalText([]byte(gc.GitCommit.Message)); err != nil { return gc, fmt.Errorf("decoding commit message: %w", err) } else if gc.Interface, err = gc.Commit.Interface(); err != nil { return gc, fmt.Errorf("casting %+v to a CommitInterface: %w", gc.Commit, err) } return } // ErrHeadIsZero is used to indicate that HEAD resolves to the zero hash. An // example of when this can happen is if the repo was just initialized and has // no commits, or if an orphan branch is checked out. var ErrHeadIsZero = errors.New("HEAD resolves to the zero hash") // GetGitHead returns the GitCommit which is currently referenced by HEAD. // This method may return ErrHeadIsZero if HEAD resolves to the zero hash. func (r *Repo) GetGitHead() (GitCommit, error) { headHash, err := r.ReferenceToHash(plumbing.HEAD) if err != nil { return GitCommit{}, fmt.Errorf("resolving HEAD: %w", err) } else if headHash == plumbing.ZeroHash { return GitCommit{}, ErrHeadIsZero } gc, err := r.GetGitCommit(headHash) if err != nil { return GitCommit{}, fmt.Errorf("getting commit %q: %w", headHash, err) } return gc, nil } // GetGitCommitRange returns an ancestry of GitCommits, with the first being the // commit immediately following the given starting hash, and the last being the // given ending hash. // // If start is plumbing.ZeroHash then the root commit will be the starting one. func (r *Repo) GetGitCommitRange(start, end plumbing.Hash) ([]GitCommit, error) { curr, err := r.GetGitCommit(end) if err != nil { return nil, fmt.Errorf("retrieving commit %q: %w", end, err) } var commits []GitCommit var found bool for { if found = start != plumbing.ZeroHash && curr.GitCommit.Hash == start; found { break } commits = append(commits, curr) numParents := curr.GitCommit.NumParents() if numParents == 0 { break } else if numParents > 1 { return nil, fmt.Errorf("commit %q has more than one parent: %+v", curr.GitCommit.Hash, curr.GitCommit.ParentHashes) } parentHash := curr.GitCommit.ParentHashes[0] parent, err := r.GetGitCommit(parentHash) if err != nil { return nil, fmt.Errorf("retrieving commit %q: %w", parentHash, err) } curr = parent } if !found && start != plumbing.ZeroHash { return nil, fmt.Errorf("unable to find commit %q as an ancestor of %q", start, end) } // reverse the commits to be in the expected order for l, r := 0, len(commits)-1; l < r; l, r = l+1, r-1 { commits[l], commits[r] = commits[r], commits[l] } return commits, nil } var ( hashStrLen = len(plumbing.ZeroHash.String()) errNotHex = errors.New("not a valid hex string") ) func (r *Repo) findCommitByShortHash(hashStr string) (plumbing.Hash, error) { paddedHashStr := hashStr if len(hashStr)%2 > 0 { paddedHashStr += "0" } if hashB, err := hex.DecodeString(paddedHashStr); err != nil { return plumbing.ZeroHash, errNotHex } else if len(hashStr) == hashStrLen { var hash plumbing.Hash copy(hash[:], hashB) return hash, nil } else if len(hashStr) < 2 { return plumbing.ZeroHash, errors.New("hash string must be 2 characters long or more") } for i := 2; i < hashStrLen; i++ { hashPrefix, hashTail := hashStr[:i], hashStr[i:] path := filepath.Join("objects", hashPrefix) fileInfos, err := r.GitDirFS.ReadDir(path) if err != nil { return plumbing.ZeroHash, fmt.Errorf("listing files in %q: %w", path, err) } var matchedHash plumbing.Hash for _, fileInfo := range fileInfos { objFileName := fileInfo.Name() if !strings.HasPrefix(objFileName, hashTail) { continue } objHash := plumbing.NewHash(hashPrefix + objFileName) obj, err := r.GitRepo.Storer.EncodedObject(plumbing.AnyObject, objHash) if err != nil { return plumbing.ZeroHash, fmt.Errorf("reading object %q off disk: %w", objHash, err) } else if obj.Type() != plumbing.CommitObject { continue } else if matchedHash == plumbing.ZeroHash { matchedHash = objHash continue } return plumbing.ZeroHash, fmt.Errorf("both %q and %q match", matchedHash, objHash) } if matchedHash != plumbing.ZeroHash { return matchedHash, nil } } return plumbing.ZeroHash, errors.New("failed to find a commit object with a matching prefix") } func (r *Repo) resolveRev(rev plumbing.Revision) (plumbing.Hash, error) { if rev == plumbing.Revision(plumbing.ZeroHash.String()) { return plumbing.ZeroHash, nil } { // pretend the revision is a short hash until proven otherwise shortHash := string(rev) hash, err := r.findCommitByShortHash(shortHash) if errors.Is(err, errNotHex) { // ok, continue } else if err != nil { return plumbing.ZeroHash, fmt.Errorf("resolving as short hash: %w", err) } else { // guess it _is_ a short hash, knew it! return hash, nil } } h, err := r.GitRepo.ResolveRevision(rev) if err != nil { return plumbing.ZeroHash, fmt.Errorf("resolving revision %q: %w", rev, err) } return *h, nil } // GetGitRevision resolves the revision and returns the GitCommit it references. func (r *Repo) GetGitRevision(rev plumbing.Revision) (GitCommit, error) { hash, err := r.resolveRev(rev) if err != nil { return GitCommit{}, err } gc, err := r.GetGitCommit(hash) if err != nil { return GitCommit{}, fmt.Errorf("getting commit %q: %w", hash, err) } return gc, nil } // GetGitRevisionRange is like GetGitCommitRange, first resolving the given // revisions into hashes before continuing with GetGitCommitRange's behavior. func (r *Repo) GetGitRevisionRange(startRev, endRev plumbing.Revision) ([]GitCommit, error) { start, err := r.resolveRev(startRev) if err != nil { return nil, err } end, err := r.resolveRev(endRev) if err != nil { return nil, err } return r.GetGitCommitRange(start, end) }