A read-only clone of the dehub project, for until dehub.dev can be brought back online.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
dehub/commit.go

610 lines
19 KiB

package dehub
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"reflect"
"sort"
"strings"
"time"
"dehub.dev/src/dehub.git/accessctl"
"dehub.dev/src/dehub.git/fs"
"dehub.dev/src/dehub.git/sigcred"
"dehub.dev/src/dehub.git/typeobj"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
yaml "gopkg.in/yaml.v2"
)
// CommitInterface describes the methods which must be implemented by the
// different commit types. None of the methods should modify the underlying
// object.
type CommitInterface interface {
// MessageHead returns the head of the commit message (i.e. the first line).
// The CommitCommon of the outer Commit is passed in for added context, if
// necessary.
MessageHead(CommitCommon) (string, error)
// ExpectedHash returns the raw hash which Signifiers can sign to accredit
// this commit. The ChangedFile objects given describe the file changes
// between the parent commit and this commit.
ExpectedHash([]ChangedFile) ([]byte, error)
// StoredHash returns the signable Hash embedded in the commit, which should
// hopefully correspond to the ExpectedHash.
StoredHash() []byte
}
// CommitCommon describes the fields common to all Commit objects.
type CommitCommon struct {
// Credentials represent all created Credentials for this commit, and can be
// set on all Commit objects regardless of other fields being set.
Credentials []sigcred.Credential `yaml:"credentials"`
}
func (cc CommitCommon) credIDs() []string {
m := map[string]struct{}{}
for _, cred := range cc.Credentials {
if cred.AccountID != "" {
m[cred.AccountID] = struct{}{}
} else if cred.AnonID != "" {
m[cred.AnonID] = struct{}{}
}
}
s := make([]string, 0, len(m))
for id := range m {
s = append(s, id)
}
sort.Strings(s)
return s
}
func abbrevCommitMessage(msg string) string {
i := strings.Index(msg, "\n")
if i > 0 {
msg = msg[:i]
}
if len(msg) > 80 {
msg = msg[:80] + "..."
}
return msg
}
// Commit represents a single Commit which is being added to a branch. Only one
// field should be set on a Commit, unless otherwise noted.
type Commit struct {
Change *CommitChange `type:"change,default"`
Credential *CommitCredential `type:"credential"`
Comment *CommitComment `type:"comment"`
Common CommitCommon `yaml:",inline"`
}
// MarshalYAML implements the yaml.Marshaler interface.
func (c Commit) MarshalYAML() (interface{}, error) {
return typeobj.MarshalYAML(c)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (c *Commit) UnmarshalYAML(unmarshal func(interface{}) error) error {
return typeobj.UnmarshalYAML(c, unmarshal)
}
// Interface returns the CommitInterface instance encapsulated by this Commit
// object.
func (c Commit) Interface() (CommitInterface, error) {
el, _, err := typeobj.Element(c)
if err != nil {
return nil, err
}
return el.(CommitInterface), nil
}
// Type returns the Commit's type (as would be used in its YAML "type" field).
func (c Commit) Type() (string, error) {
_, typeStr, err := typeobj.Element(c)
if err != nil {
return "", err
}
return typeStr, nil
}
// MarshalText implements the encoding.TextMarshaler interface by returning the
// form the Commit object takes in the git commit message.
func (c Commit) MarshalText() ([]byte, error) {
commitInt, err := c.Interface()
if err != nil {
return nil, fmt.Errorf("could not cast Commit %+v to interface : %w", c, err)
}
msgHead, err := commitInt.MessageHead(c.Common)
if err != nil {
return nil, fmt.Errorf("error constructing message head: %w", err)
}
msgBodyB, err := yaml.Marshal(c)
if err != nil {
return nil, fmt.Errorf("error marshaling commit %+v as yaml: %w", c, err)
}
w := new(bytes.Buffer)
w.WriteString(msgHead)
w.WriteString("\n\n---\n")
w.Write(msgBodyB)
return w.Bytes(), nil
}
// UnmarshalText implements the encoding.TextUnmarshaler interface by decoding a
// Commit object which has been encoded into a git commit message.
func (c *Commit) UnmarshalText(msg []byte) error {
i := bytes.Index(msg, []byte("\n"))
if i < 0 {
return fmt.Errorf("commit message %q is malformed, it has no body", msg)
}
msgBody := msg[i:]
if err := yaml.Unmarshal(msgBody, c); err != nil {
return fmt.Errorf("could not unmarshal Commit message from yaml: %w", err)
} else if reflect.DeepEqual(*c, Commit{}) {
// a basic check, but worthwhile
return errors.New("commit message is malformed, could not unmarshal yaml object")
}
return nil
}
// AccreditCommit returns the given Commit with an appended Credential provided
// by the given SignifierInterface.
func (r *Repo) AccreditCommit(commit Commit, sigInt sigcred.SignifierInterface) (Commit, error) {
commitInt, err := commit.Interface()
if err != nil {
return commit, fmt.Errorf("could not cast commit %+v to interface: %w", commit, err)
}
headFS, err := r.headFS()
if err != nil {
return commit, fmt.Errorf("could not grab snapshot of HEAD fs: %w", err)
}
cred, err := sigInt.Sign(headFS, commitInt.StoredHash())
if err != nil {
return commit, fmt.Errorf("could not accredit change commit: %w", err)
}
commit.Common.Credentials = append(commit.Common.Credentials, cred)
return commit, nil
}
// CommitBareParams are the parameters to the CommitBare method. All are
// required, unless otherwise noted.
type CommitBareParams struct {
Commit Commit
Author string
ParentHash plumbing.Hash // can be zero if the commit has no parents (Q_Q)
GitTree *object.Tree
}
// CommitBare constructs a git commit object and and stores it, returning the
// resulting GitCommit. This method does not interact with HEAD at all.
func (r *Repo) CommitBare(params CommitBareParams) (GitCommit, error) {
msgB, err := params.Commit.MarshalText()
if err != nil {
return GitCommit{}, fmt.Errorf("encoding %T to message string: %w",
params.Commit, err)
}
author := object.Signature{
Name: params.Author,
When: time.Now(),
}
commit := &object.Commit{
Author: author,
Committer: author,
Message: string(msgB),
TreeHash: params.GitTree.Hash,
}
if params.ParentHash != plumbing.ZeroHash {
commit.ParentHashes = []plumbing.Hash{params.ParentHash}
}
commitObj := r.GitRepo.Storer.NewEncodedObject()
if err := commit.Encode(commitObj); err != nil {
return GitCommit{}, fmt.Errorf("encoding commit object: %w", err)
}
commitHash, err := r.GitRepo.Storer.SetEncodedObject(commitObj)
if err != nil {
return GitCommit{}, fmt.Errorf("setting encoded object: %w", err)
}
return r.GetGitCommit(commitHash)
}
// Commit uses the given Commit to create a git commit object and commits it to
// the current HEAD, returning the full GitCommit.
func (r *Repo) Commit(commit Commit) (GitCommit, error) {
headRef, err := r.TraverseReferenceChain(plumbing.HEAD, func(ref *plumbing.Reference) bool {
return ref.Type() == plumbing.HashReference
})
if err != nil {
return GitCommit{}, fmt.Errorf("resolving HEAD to a hash reference: %w", err)
}
headRefName := headRef.Name()
headHash, err := r.ReferenceToHash(headRefName)
if err != nil {
return GitCommit{}, fmt.Errorf("resolving ref %q (HEAD): %w", headRefName, err)
}
// TODO this is also used in the same way in NewCommitChange. It might make
// sense to refactor this logic out, it might not be needed in fs at all.
_, stagedTree, err := fs.FromStagedChangesTree(r.GitRepo)
if err != nil {
return GitCommit{}, fmt.Errorf("getting staged changes: %w", err)
}
gitCommit, err := r.CommitBare(CommitBareParams{
Commit: commit,
Author: strings.Join(commit.Common.credIDs(), ", "),
ParentHash: headHash,
GitTree: stagedTree,
})
if err != nil {
return GitCommit{}, err
}
// now set the branch to this new commit
newHeadRef := plumbing.NewHashReference(headRefName, gitCommit.GitCommit.Hash)
if err := r.GitRepo.Storer.SetReference(newHeadRef); err != nil {
return GitCommit{}, fmt.Errorf("setting reference %q to new commit hash %q: %w",
headRefName, gitCommit.GitCommit.Hash, err)
}
return gitCommit, nil
}
// HasStagedChanges returns true if there are file changes which have been
// staged (e.g. via "git add").
func (r *Repo) HasStagedChanges() (bool, error) {
w, err := r.GitRepo.Worktree()
if err != nil {
return false, fmt.Errorf("error retrieving worktree: %w", err)
}
status, err := w.Status()
if err != nil {
return false, fmt.Errorf("error retrieving worktree status: %w", err)
}
var any bool
for _, fileStatus := range status {
if fileStatus.Staging != git.Unmodified &&
fileStatus.Staging != git.Untracked {
any = true
break
}
}
return any, nil
}
// VerifyCommits verifies that the given commits, which are presumably on the
// given branch, are gucci.
func (r *Repo) VerifyCommits(branchName plumbing.ReferenceName, gitCommits []GitCommit) error {
// this isn't strictly necessary for this method, but it helps discover bugs
// in other parts of the code.
if len(gitCommits) == 0 {
return errors.New("cannot call VerifyCommits with empty commit slice")
}
// First determine the root of the main branch. All commits need to be an
// ancestor of it. If the main branch has not been created yet then there
// might not be a root commit yet.
var rootCommit *object.Commit
mainGitCommit, err := r.GetGitRevision(plumbing.Revision(MainRefName))
if errors.Is(err, plumbing.ErrReferenceNotFound) {
// main branch hasn't been created yet. The commits can only be verified
// if they are for the main branch and they include the root commit.
if branchName != MainRefName {
return fmt.Errorf("cannot verify commits in branch %q when no main branch exists", branchName)
}
for _, gitCommit := range gitCommits {
if gitCommit.GitCommit.NumParents() == 0 {
rootCommit = gitCommit.GitCommit
break
}
}
if rootCommit == nil {
return errors.New("root commit of main branch cannot be determined")
}
} else if err != nil {
return fmt.Errorf("retrieving commit at HEAD of %q: %w", MainRefName.Short(), err)
} else {
rootCommit = mainGitCommit.GitCommit
for {
if rootCommit.NumParents() == 0 {
break
} else if rootCommit.NumParents() > 1 {
return fmt.Errorf("commit %q in main branch has more than one parent", rootCommit.Hash)
} else if rootCommit, err = rootCommit.Parent(0); err != nil {
return fmt.Errorf("retrieving parent commit of %q: %w", rootCommit.Hash, err)
}
}
}
// We also need the HEAD of the given branch, if it exists.
branchGitCommit, err := r.GetGitRevision(plumbing.Revision(branchName))
if err != nil && !errors.Is(err, plumbing.ErrReferenceNotFound) {
return fmt.Errorf("retrieving commit at HEAD of %q: %w", branchName.Short(), err)
}
for i, gitCommit := range gitCommits {
// It's not a requirement that the given GitCommits are in ancestral
// order, but usually they are; if the previous commit is the parent of
// this one we can skip a bunch of work.
var parentTree *object.Tree
var isNonFF bool
if i > 0 && gitCommits[i-1].GitCommit.Hash == gitCommit.GitCommit.ParentHashes[0] {
parentTree = gitCommits[i-1].GitTree
} else if gitCommit.GitCommit.Hash == rootCommit.Hash {
// looking at the root commit, assume it's ok
} else {
var err error
isAncestor := func(older, younger *object.Commit) bool {
var isAncestor bool
if err != nil {
return false
} else if isAncestor, err = older.IsAncestor(younger); err != nil {
err = fmt.Errorf("determining if %q is an ancestor of %q: %w",
younger.Hash, older.Hash, err)
return false
}
return isAncestor
}
ancestorOfRoot := isAncestor(rootCommit, gitCommit.GitCommit)
if branchGitCommit.GitCommit != nil {
// if the branch doesn't actually exist then this couldn't
// possibly be a nonFF
isNonFF = !isAncestor(branchGitCommit.GitCommit, gitCommit.GitCommit)
}
if err != nil {
return err
} else if !ancestorOfRoot {
return fmt.Errorf("commit %q must be direct descendant of root commit of %q (%q)",
gitCommit.GitCommit.Hash, MainRefName.Short(), rootCommit.Hash,
)
}
}
if err := r.verifyCommit(branchName, gitCommit, parentTree, isNonFF); err != nil {
return fmt.Errorf("verifying commit %q: %w",
gitCommit.GitCommit.Hash, err)
}
}
return nil
}
// parentTree returns the tree of the parent commit of the given commit. If the
// given commit has no parents then a bare tree is returned.
func (r *Repo) parentTree(commitObj *object.Commit) (*object.Tree, error) {
switch commitObj.NumParents() {
case 0:
return new(object.Tree), nil
case 1:
if parentCommitObj, err := commitObj.Parent(0); err != nil {
return nil, fmt.Errorf("getting parent commit %q: %w",
commitObj.ParentHashes[0], err)
} else if parentTree, err := r.GitRepo.TreeObject(parentCommitObj.TreeHash); err != nil {
return nil, fmt.Errorf("getting parent tree object %q: %w",
parentCommitObj.TreeHash, err)
} else {
return parentTree, nil
}
default:
return nil, errors.New("commit has multiple parents")
}
}
// if parentTree is nil then it will be inferred.
func (r *Repo) verifyCommit(
branchName plumbing.ReferenceName,
gitCommit GitCommit,
parentTree *object.Tree,
isNonFF bool,
) error {
parentTree, err := r.parentTree(gitCommit.GitCommit)
if err != nil {
return fmt.Errorf("retrieving parent tree of commit: %w", err)
}
var sigFS fs.FS
if gitCommit.Root() {
sigFS = fs.FromTree(gitCommit.GitTree)
} else {
sigFS = fs.FromTree(parentTree)
}
cfg, err := r.loadConfig(sigFS)
if err != nil {
return fmt.Errorf("loading config of parent %q: %w",
gitCommit.GitCommit.ParentHashes[0], err)
}
// assert access controls
changedFiles, err := ChangedFilesBetweenTrees(parentTree, gitCommit.GitTree)
if err != nil {
return fmt.Errorf("calculating diff from tree %q to tree %q: %w",
parentTree.Hash, gitCommit.GitTree.Hash, err)
} else if len(changedFiles) > 0 && gitCommit.Commit.Change == nil {
return errors.New("files changes but commit is not a change commit")
}
pathsChanged := make([]string, len(changedFiles))
for i := range changedFiles {
pathsChanged[i] = changedFiles[i].Path
}
commitType, err := gitCommit.Commit.Type()
if err != nil {
return fmt.Errorf("determining type of commit %+v: %w", gitCommit.Commit, err)
}
err = accessctl.AssertCanCommit(cfg.AccessControls, accessctl.CommitRequest{
Type: commitType,
Branch: branchName.Short(),
Credentials: gitCommit.Commit.Common.Credentials,
FilesChanged: pathsChanged,
NonFastForward: isNonFF,
})
if err != nil {
return fmt.Errorf("asserting access controls: %w", err)
}
// ensure the hash is what it's expected to be
storedCommitHash := gitCommit.Interface.StoredHash()
expectedCommitHash, err := gitCommit.Interface.ExpectedHash(changedFiles)
if err != nil {
return fmt.Errorf("calculating expected commit hash: %w", err)
} else if !bytes.Equal(storedCommitHash, expectedCommitHash) {
return fmt.Errorf("unexpected hash in commit body, is %s but should be %s",
base64.StdEncoding.EncodeToString(storedCommitHash),
base64.StdEncoding.EncodeToString(expectedCommitHash))
}
// verify all credentials
for _, cred := range gitCommit.Commit.Common.Credentials {
if cred.AccountID == "" {
if err := cred.SelfVerify(expectedCommitHash); err != nil {
return fmt.Errorf("verifying credential %+v: %w", cred, err)
}
} else {
sig, err := r.signifierForCredential(sigFS, cred)
if err != nil {
return fmt.Errorf("finding signifier for credential %+v: %w", cred, err)
} else if err := sig.Verify(sigFS, expectedCommitHash, cred); err != nil {
return fmt.Errorf("verifying credential %+v: %w", cred, err)
}
}
}
return nil
}
type changeRangeInfo struct {
changeCommits []GitCommit
authors map[string]struct{}
msg string
startTree, endTree *object.Tree
changeHash []byte
}
// changeRangeInfo returns various pieces of information about a range of
// commits' changes.
func (r *Repo) changeRangeInfo(commits []GitCommit) (changeRangeInfo, error) {
info := changeRangeInfo{
authors: map[string]struct{}{},
}
for _, commit := range commits {
if _, ok := commit.Interface.(*CommitChange); ok {
info.changeCommits = append(info.changeCommits, commit)
for _, cred := range commit.Commit.Common.Credentials {
info.authors[cred.AccountID] = struct{}{}
}
}
}
if len(info.changeCommits) == 0 {
return changeRangeInfo{}, errors.New("no change commits found")
}
// startTree has to be the tree of the parent of the first commit, which
// isn't included in commits. Determine it the hard way.
var err error
if info.startTree, err = r.parentTree(commits[0].GitCommit); err != nil {
return changeRangeInfo{}, fmt.Errorf("getting tree of parent of %q: %w",
commits[0].GitCommit.Hash, err)
}
lastChangeCommit := info.changeCommits[len(info.changeCommits)-1]
info.msg = lastChangeCommit.Commit.Change.Message
info.endTree = lastChangeCommit.GitTree
changedFiles, err := ChangedFilesBetweenTrees(info.startTree, info.endTree)
if err != nil {
return changeRangeInfo{}, fmt.Errorf("calculating diff of commit trees %q and %q: %w",
info.startTree.Hash, info.endTree.Hash, err)
}
info.changeHash = genChangeHash(nil, info.msg, changedFiles)
return info, nil
}
// VerifyCanSetBranchHEADTo is used to verify that a branch's HEAD can be set to
// the given hash. It verifies any new commits which are being added, and
// handles verifying non-fast-forward commits as well.
//
// If the given hash matches the current HEAD of the branch then this performs
// no further checks and returns nil.
func (r *Repo) VerifyCanSetBranchHEADTo(branchName plumbing.ReferenceName, hash plumbing.Hash) error {
oldCommitRef, err := r.GitRepo.Reference(branchName, true)
if errors.Is(err, plumbing.ErrReferenceNotFound) {
// if the branch is being created then just pull all of its commits and
// verify them.
// TODO optimize this so that it tries to use the merge-base with main,
// so we're not re-verifying a ton of commits unecessarily
commits, err := r.GetGitCommitRange(plumbing.ZeroHash, hash)
if err != nil {
return fmt.Errorf("retrieving %q and all its ancestors: %w", hash, err)
}
return r.VerifyCommits(branchName, commits)
} else if err != nil {
return fmt.Errorf("resolving branch reference to a hash: %w", err)
} else if oldCommitRef.Hash() == hash {
// if the HEAD is already at the given hash then it must be fine.
return nil
}
oldCommitObj, err := r.GitRepo.CommitObject(oldCommitRef.Hash())
if err != nil {
return fmt.Errorf("retrieving commit object %q: %w", oldCommitRef.Hash(), err)
}
newCommitObj, err := r.GitRepo.CommitObject(hash)
if err != nil {
return fmt.Errorf("retrieving commit object %q: %w", hash, err)
}
mbCommits, err := oldCommitObj.MergeBase(newCommitObj)
if err != nil {
return fmt.Errorf("determining merge-base between %q and %q: %w",
oldCommitObj.Hash, newCommitObj.Hash, err)
} else if len(mbCommits) == 0 {
return fmt.Errorf("%q and %q have no ancestors in common",
oldCommitObj.Hash, newCommitObj.Hash)
} else if len(mbCommits) == 2 {
return fmt.Errorf("%q and %q have more than one ancestor in common",
oldCommitObj.Hash, newCommitObj.Hash)
}
commits, err := r.GetGitCommitRange(mbCommits[0].Hash, hash)
if err != nil {
return fmt.Errorf("retrieving commits %q to %q: %w", mbCommits[0].Hash, hash, err)
}
return r.VerifyCommits(branchName, commits)
}