From 1c2bc11fc30ace6cb9c0617c4d870c53800f0661 Mon Sep 17 00:00:00 2001 From: mediocregopher <> Date: Sun, 22 Mar 2020 17:54:25 -0600 Subject: [PATCH] refactor how commits are created, as well as how reference following is done --- type: change message: |- refactor how commits are created, as well as how reference following is done In light of the upcoming combine commit feature, it will be necessary to have more fine-grained control over how commits are created and stored. This change attempts to address that, by adding a CommitBare method, which the Commit method then uses. CommitBare merely creates and stores a commit, without updating HEAD. Since Commit now has to figure out the parent commit of the HEAD, and deal with the case of HEAD not being set (a new repo, or an orphan branch), the logic for reference traversal had to be refactored some as well. change_hash: AKwHmb4T5APrl3EL7Ox27ABGWrjqjlOGvBD1cOycgPcN credentials: - type: pgp_signature pub_key_id: 95C46FA6A41148AC body: iQIzBAABAgAdFiEEJ6tQKp6olvZKJ0lwlcRvpqQRSKwFAl53+qwACgkQlcRvpqQRSKzXuQ/9HBnrwB3zYSsLsQrNTlZ6MW5DaH/y1pypba6eD57fC6Tl3ipuDvTLrjiRMdPkS8LFvPOkT8FUFRZH7farBZBZGRnoAL88ELPB/h5Ip7FaUu+syfuTsHnBvWPs2hPN/sEhceIy+io3P03vIKSYCPMAhhigkWhoZZa+SJNQyTTJ/WzbGmuopCYhe3/4JDfpLNIvfOzJb40jVXqmqZ4Lf82uqf9DIh8LjNRhhOBeVFOVyQ4FOHh6a/T7D4qNSMPi/RRFg9VSXlNV5tp2ykdQdzyS6vdBBxV5j8pz8DyTDZYxlqkHyb5Blkl4vcndoJOYjyacVqVuXMgvjf0a8cmuhDEmC1vRDepx4jKZQwKyl1HQ9X0+KodMAxrx671+m7Zga9OcyCdNdyaH+Tl2W4c6apaEv2lcx5DalSLW2uoDbo5B4uutBOFrL12ML2tnnyF93tRSZkcNibo69/88qjFjaYEAjOhupgNtrbU+92Dsj/8UGYKFQgfx7DSHGXhs8fzphzPvZifWoiGV49iXvOauNlUzolX3h4bmxcXAnl/+tC4F1a19TGl7TG7HAC8kajXAf01pSD+WxJGMN+gerOYHOCrlQVx+sWhQBh22yfsaAJXPLl4R/2bUKqSTXjHHp3/WTqK4HdRRN/L728YhOxBCy6Ue0CjAZKpi+cQWQUUxpCKQiXs= account: mediocregopher --- cmd/dehub/cmd_verify.go | 4 +- commit.go | 132 ++++++++++++++++++++++++++++++---------- commit_change.go | 2 +- repo.go | 92 ++++++++++++++++++---------- repo_test.go | 6 +- 5 files changed, 168 insertions(+), 68 deletions(-) diff --git a/cmd/dehub/cmd_verify.go b/cmd/dehub/cmd_verify.go index 30a7f49..bdd416d 100644 --- a/cmd/dehub/cmd_verify.go +++ b/cmd/dehub/cmd_verify.go @@ -25,8 +25,8 @@ func cmdVerify(ctx context.Context, cmd *dcmd.Cmd) { var branchName plumbing.ReferenceName if *branch == "" { - if branchName, err = repo.CheckedOutBranch(); err != nil { - return nil, fmt.Errorf("could not determined currently checked out branch: %w", err) + if branchName, err = repo.ReferenceToBranchName(plumbing.HEAD); err != nil { + return nil, fmt.Errorf("determining branch at HEAD: %w", err) } } else { branchName = plumbing.NewBranchReferenceName(*branch) diff --git a/commit.go b/commit.go index 7808d95..e853e35 100644 --- a/commit.go +++ b/commit.go @@ -6,7 +6,6 @@ import ( "dehub/fs" "dehub/sigcred" "dehub/typeobj" - "encoding" "encoding/base64" "errors" "fmt" @@ -177,34 +176,93 @@ func (r *Repo) AccreditCommit(commit Commit, sigInt sigcred.SignifierInterface) return commit, nil } -// Commit uses the given TextMarshaler to create a git commit object (with the -// specified accountID as the author) and commits it to the current HEAD, -// returning the hash of the commit. -func (r *Repo) Commit(m encoding.TextMarshaler, accountID string) (GitCommit, error) { - msgB, err := m.MarshalText() +// CommitBareParams are the parameters to the CommitBare method. All are +// required, unless otherwise noted. +type CommitBareParams struct { + Commit Commit + AccountID string // used as the author + ParentHash plumbing.Hash // can be zero if the commit has no parents (Q_Q) + GitTree *object.Tree +} + +// CommitBare constructs a git commit object and and stores it, returning the +// resulting GitCommit. This method does not interact with HEAD at all. +func (r *Repo) CommitBare(params CommitBareParams) (GitCommit, error) { + msgB, err := params.Commit.MarshalText() if err != nil { - return GitCommit{}, fmt.Errorf("encoding %T to message string: %v", m, err) + return GitCommit{}, fmt.Errorf("encoding %T to message string: %w", + params.Commit, err) } - w, err := r.GitRepo.Worktree() + author := object.Signature{ + Name: params.AccountID, + When: time.Now(), + } + commit := &object.Commit{ + Author: author, + Committer: author, + Message: string(msgB), + TreeHash: params.GitTree.Hash, + } + if params.ParentHash != plumbing.ZeroHash { + commit.ParentHashes = []plumbing.Hash{params.ParentHash} + } + + commitObj := r.GitRepo.Storer.NewEncodedObject() + if err := commit.Encode(commitObj); err != nil { + return GitCommit{}, fmt.Errorf("encoding commit object: %w", err) + } + + commitHash, err := r.GitRepo.Storer.SetEncodedObject(commitObj) if err != nil { - return GitCommit{}, fmt.Errorf("getting git worktree: %w", err) + return GitCommit{}, fmt.Errorf("setting encoded object: %w", err) } - h, err := w.Commit(string(msgB), &git.CommitOptions{ - Author: &object.Signature{ - Name: accountID, - When: time.Now(), - }, + + return r.GetGitCommit(commitHash) +} + +// Commit uses the given Commit to create a git commit object (with the +// specified accountID as the author) and commits it to the current HEAD, +// returning the full GitCommit. +func (r *Repo) Commit(commit Commit, accountID string) (GitCommit, error) { + headRef, err := r.TraverseReferenceChain(plumbing.HEAD, func(ref *plumbing.Reference) bool { + return ref.Type() == plumbing.HashReference }) if err != nil { - return GitCommit{}, fmt.Errorf("committing to git worktree: %w", err) + return GitCommit{}, fmt.Errorf("resolving HEAD to a hash reference: %w", err) + } + headRefName := headRef.Name() + + headHash, err := r.ReferenceToHash(headRefName) + if err != nil { + return GitCommit{}, fmt.Errorf("resolving ref %q (HEAD): %w", headRefName, err) + } + + // TODO this is also used in the same way in NewCommitChange. It might make + // sense to refactor this logic out, it might not be needed in fs at all. + _, stagedTree, err := fs.FromStagedChangesTree(r.GitRepo) + if err != nil { + return GitCommit{}, fmt.Errorf("getting staged changes: %w", err) } - gc, err := r.GetGitCommit(h) + gitCommit, err := r.CommitBare(CommitBareParams{ + Commit: commit, + AccountID: accountID, + ParentHash: headHash, + GitTree: stagedTree, + }) if err != nil { - return GitCommit{}, fmt.Errorf("retrieving fresh commit %q back from git: %w", h, err) + return GitCommit{}, err + } + + // now set the branch to this new commit + newHeadRef := plumbing.NewHashReference(headRefName, gitCommit.GitCommit.Hash) + if err := r.GitRepo.Storer.SetReference(newHeadRef); err != nil { + return GitCommit{}, fmt.Errorf("setting reference %q to new commit hash %q: %w", + headRefName, gitCommit.GitCommit.Hash, err) } - return gc, nil + + return gitCommit, nil } // HasStagedChanges returns true if there are file changes which have been @@ -308,20 +366,32 @@ func (r *Repo) VerifyCommits(branch plumbing.ReferenceName, gitCommits []GitComm return nil } +// parentTree returns the tree of the parent commit of the given commit. If the +// given commit has no parents then a bare tree is returned. +func (r *Repo) parentTree(commitObj *object.Commit) (*object.Tree, error) { + switch commitObj.NumParents() { + case 0: + return new(object.Tree), nil + case 1: + if parentCommitObj, err := commitObj.Parent(0); err != nil { + return nil, fmt.Errorf("getting parent commit %q: %w", + commitObj.ParentHashes[0], err) + } else if parentTree, err := r.GitRepo.TreeObject(parentCommitObj.TreeHash); err != nil { + return nil, fmt.Errorf("getting parent tree object %q: %w", + parentCommitObj.TreeHash, err) + } else { + return parentTree, nil + } + default: + return nil, errors.New("commit has multiple parents") + } +} + // if parentTree is nil then it will be inferred. func (r *Repo) verifyCommit(branch plumbing.ReferenceName, gitCommit GitCommit, parentTree *object.Tree) error { - isRoot := gitCommit.Root() - - if parentTree == nil { - if isRoot { - parentTree = new(object.Tree) - } else if parentCommit, err := gitCommit.GitCommit.Parent(0); err != nil { - return fmt.Errorf("getting parent commit %q: %w", - gitCommit.GitCommit.ParentHashes[0], err) - } else if parentTree, err = r.GitRepo.TreeObject(parentCommit.TreeHash); err != nil { - return fmt.Errorf("getting parent tree object %q: %w", - parentCommit.TreeHash, err) - } + parentTree, err := r.parentTree(gitCommit.GitCommit) + if err != nil { + return fmt.Errorf("retrieving parent tree of commit: %w", err) } vctx := verificationCtx{ @@ -331,7 +401,7 @@ func (r *Repo) verifyCommit(branch plumbing.ReferenceName, gitCommit GitCommit, } var sigFS fs.FS - if isRoot { + if gitCommit.Root() { sigFS = fs.FromTree(vctx.commitTree) } else { sigFS = fs.FromTree(vctx.parentTree) diff --git a/commit_change.go b/commit_change.go index 341639c..e66d3ab 100644 --- a/commit_change.go +++ b/commit_change.go @@ -22,7 +22,7 @@ var _ CommitInterface = CommitChange{} // returned Commit will _not_ be filled in. func (r *Repo) NewCommitChange(msg string) (Commit, error) { headTree := new(object.Tree) - if head, err := r.GetGitHead(); err != nil && !errors.Is(err, ErrNoHead) { + if head, err := r.GetGitHead(); err != nil && !errors.Is(err, ErrHeadIsZero) { return Commit{}, fmt.Errorf("getting HEAD commit: %w", err) } else if err == nil { headTree = head.GitTree diff --git a/repo.go b/repo.go index 80c1659..a6d6920 100644 --- a/repo.go +++ b/repo.go @@ -93,8 +93,8 @@ func InitMemRepo() *Repo { } func (r *Repo) init() error { - h := plumbing.NewSymbolicReference(plumbing.HEAD, MainRefName) - if err := r.GitRepo.Storer.SetReference(h); err != nil { + headRef := plumbing.NewSymbolicReference(plumbing.HEAD, MainRefName) + if err := r.GitRepo.Storer.SetReference(headRef); err != nil { return fmt.Errorf("setting HEAD reference to %q: %w", MainRefName, err) } return nil @@ -108,40 +108,70 @@ func (r *Repo) billyFilesystem() (billy.Filesystem, error) { return w.Filesystem, nil } -// CheckedOutBranch returns the name of the currently checked out branch. -func (r *Repo) CheckedOutBranch() (plumbing.ReferenceName, error) { - // Head() can't be used for this, because it doesn't handle the case of a - // newly initialized repo very well. - ogRef, err := r.GitRepo.Storer.Reference(plumbing.HEAD) - if err != nil { - return "", fmt.Errorf("de-referencing HEAD (is it a bare repo?): %w", err) - } +var errTraverseRefNoMatch = errors.New("failed to find reference matching given predicate") - ref := ogRef +// TraverseReferenceChain resolves a chain of references, calling the given +// predicate on each one, and returning the first one for which the predicate +// returns true. This method will return an error if it reaches the end of the +// chain and the predicate still has not returned true. +// +// If a reference name is encountered which does not actually exist, then it is +// assumed to be a hash reference to the zero hash. +func (r *Repo) TraverseReferenceChain(refName plumbing.ReferenceName, pred func(*plumbing.Reference) bool) (*plumbing.Reference, error) { + // TODO infinite loop checking for { - if ref.Type() != plumbing.SymbolicReference { - break + ref, err := r.GitRepo.Storer.Reference(refName) + if errors.Is(err, plumbing.ErrReferenceNotFound) { + ref = plumbing.NewHashReference(refName, plumbing.ZeroHash) + } else if err != nil { + return nil, fmt.Errorf("resolving reference %q: %w", refName, err) } - target := ref.Target() - if target.IsBranch() { - return target, nil + if pred(ref) { + return ref, nil + } else if ref.Type() != plumbing.SymbolicReference { + return nil, errTraverseRefNoMatch } + refName = ref.Target() + } +} - ref, err = r.GitRepo.Storer.Reference(target) - if err != nil { - break - } +// ReferenceToBranchName traverses a chain of references looking for a branch +// reference, and returns that name, or returns an error if no branch reference +// is part of the chain. +func (r *Repo) ReferenceToBranchName(refName plumbing.ReferenceName) (plumbing.ReferenceName, error) { + ref, err := r.TraverseReferenceChain(refName, func(ref *plumbing.Reference) bool { + return ref.Target().IsBranch() + }) + if errors.Is(err, errTraverseRefNoMatch) { + return "", errors.New("no branch in reference chain") + } else if err != nil { + return "", fmt.Errorf("traversing reference chain: %w", err) } + return ref.Target(), nil +} - return "", fmt.Errorf("could not de-reference HEAD to a branch: %w", err) +// ReferenceToHash fully resolves a reference to a hash. If a reference cannot +// be resolved then plumbing.ZeroHash is returned. +func (r *Repo) ReferenceToHash(refName plumbing.ReferenceName) (plumbing.Hash, error) { + ref, err := r.TraverseReferenceChain(refName, func(ref *plumbing.Reference) bool { + return ref.Type() == plumbing.HashReference + }) + if errors.Is(err, errTraverseRefNoMatch) { + return plumbing.ZeroHash, errors.New("no hash in reference chain (is this even possible???)") + } else if errors.Is(err, plumbing.ErrReferenceNotFound) { + return plumbing.ZeroHash, nil + } else if err != nil { + return plumbing.ZeroHash, fmt.Errorf("traversing reference chain: %w", err) + } + return ref.Hash(), nil } // headFS returns an FS based on the HEAD commit, or if there is no HEAD commit // (it's an empty repo) an FS based on the raw filesystem. func (r *Repo) headFS() (fs.FS, error) { head, err := r.GetGitHead() - if errors.Is(err, ErrNoHead) { + if errors.Is(err, ErrHeadIsZero) { bfs, err := r.billyFilesystem() if err != nil { return nil, fmt.Errorf("getting underlying filesystem: %w", err) @@ -205,20 +235,20 @@ func (r *Repo) GetGitRevision(rev plumbing.Revision) (GitCommit, error) { return gc, nil } -// ErrNoHead is returns from GetGitHead if there is no HEAD reference defined in -// the repo. This can happen if the repo has no commits -var ErrNoHead = errors.New("HEAD reference not found") +// ErrHeadIsZero is used to indicate that HEAD resolves to the zero hash. An +// example of when this can happen is if the repo was just initialized and has +// no commits, or if an orphan branch is checked out. +var ErrHeadIsZero = errors.New("HEAD resolves to the zero hash") // GetGitHead returns the GitCommit which is currently referenced by HEAD. -// This method may return ErrNoHead if the repo has no commits. +// This method may return ErrHeadIsZero if HEAD resolves to the zero hash. func (r *Repo) GetGitHead() (GitCommit, error) { - head, err := r.GitRepo.Head() - if errors.Is(err, plumbing.ErrReferenceNotFound) { - return GitCommit{}, ErrNoHead - } else if err != nil { + headHash, err := r.ReferenceToHash(plumbing.HEAD) + if err != nil { return GitCommit{}, fmt.Errorf("resolving HEAD: %w", err) + } else if headHash == plumbing.ZeroHash { + return GitCommit{}, ErrHeadIsZero } - headHash := head.Hash() gc, err := r.GetGitCommit(headHash) if err != nil { diff --git a/repo_test.go b/repo_test.go index c0759af..3c855f7 100644 --- a/repo_test.go +++ b/repo_test.go @@ -162,7 +162,7 @@ func (h *harness) tryCommit( h.t.Fatalf("failed to commit ChangeCommit: %v", err) } - branch, err := h.repo.CheckedOutBranch() + branch, err := h.repo.ReferenceToBranchName(plumbing.HEAD) if err != nil { h.t.Fatalf("determining checked out branch: %v", err) } @@ -177,7 +177,7 @@ func (h *harness) tryCommit( } if gitCommit.GitCommit.NumParents() == 0 { - h.t.Fatalf("unverifiable commit %q has no parents, but it should", gitCommit.GitCommit.NumParents()) + h.t.Fatalf("unverifiable commit %q has no parents, but it should", gitCommit.GitCommit.Hash) } h.reset(gitCommit.GitCommit.ParentHashes[0], git.HardReset) @@ -289,7 +289,7 @@ func TestThisRepoStillVerifies(t *testing.T) { headGitCommit.GitCommit.Hash, err) } - checkedOutBranch, err := repo.CheckedOutBranch() + checkedOutBranch, err := repo.ReferenceToBranchName(plumbing.HEAD) if err != nil { t.Fatalf("error determining checked out branch: %v", err) }