A read-only clone of the dehub project, for until dehub.dev can be brought back online.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
dehub/payload.go

628 lines
20 KiB

package dehub
import (
"bytes"
"errors"
"fmt"
"sort"
"strings"
"time"
"dehub.dev/src/dehub.git/accessctl"
"dehub.dev/src/dehub.git/fs"
"dehub.dev/src/dehub.git/sigcred"
"dehub.dev/src/dehub.git/typeobj"
"dehub.dev/src/dehub.git/yamlutil"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
yaml "gopkg.in/yaml.v2"
)
// Payload describes the methods which must be implemented by the different
// payload types. None of the methods should modify the underlying object.
type Payload interface {
// MessageHead returns the head of the commit message (i.e. the first line).
// The PayloadCommon of the outer PayloadUnion is passed in for added
// context, if necessary.
MessageHead(PayloadCommon) string
// Fingerprint returns the raw fingerprint which can be signed when
// accrediting this payload. The ChangedFile objects given describe the file
// changes between the parent commit and this commit.
//
// If this method returns nil it means that the payload has no fingerprint
// in-and-of-itself.
Fingerprint([]ChangedFile) ([]byte, error)
}
// PayloadCommon describes the fields common to all Payloads.
type PayloadCommon struct {
Fingerprint yamlutil.Blob `yaml:"fingerprint"`
Credentials []sigcred.CredentialUnion `yaml:"credentials"`
// LegacyChangeHash is no longer used, use Fingerprint instead.
LegacyChangeHash yamlutil.Blob `yaml:"change_hash,omitempty"`
}
func (cc PayloadCommon) credIDs() []string {
m := map[string]struct{}{}
for _, cred := range cc.Credentials {
if cred.AccountID != "" {
m[cred.AccountID] = struct{}{}
} else if cred.AnonID != "" {
m[cred.AnonID] = struct{}{}
}
}
s := make([]string, 0, len(m))
for id := range m {
s = append(s, id)
}
sort.Strings(s)
return s
}
func abbrevCommitMessage(msg string) string {
i := strings.Index(msg, "\n")
if i > 0 {
msg = msg[:i]
}
if len(msg) > 80 {
msg = msg[:77] + "..."
}
return msg
}
// PayloadUnion represents a single Payload of variable type. Only one field
// should be set on a PayloadUnion, unless otherwise noted.
type PayloadUnion struct {
Change *PayloadChange `type:"change,default"`
Credential *PayloadCredential `type:"credential"`
Comment *PayloadComment `type:"comment"`
// Common may be set in addition to one of the other fields.
Common PayloadCommon `yaml:",inline"`
}
// MarshalYAML implements the yaml.Marshaler interface.
func (p PayloadUnion) MarshalYAML() (interface{}, error) {
return typeobj.MarshalYAML(p)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (p *PayloadUnion) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err := typeobj.UnmarshalYAML(p, unmarshal); err != nil {
return err
} else if len(p.Common.LegacyChangeHash) > 0 {
p.Common.Fingerprint = p.Common.LegacyChangeHash
p.Common.LegacyChangeHash = nil
}
return nil
}
// Payload returns the Payload instance encapsulated by this PayloadUnion.
//
// This will panic if a Payload field is not populated.
func (p PayloadUnion) Payload() Payload {
el, _, err := typeobj.Element(p)
if err != nil {
panic(err)
}
return el.(Payload)
}
// Type returns the Payload's type (as would be used in its YAML "type" field).
//
// This will panic if a Payload field is not populated.
func (p PayloadUnion) Type() string {
_, typeStr, err := typeobj.Element(p)
if err != nil {
panic(err)
}
return typeStr
}
// MarshalText implements the encoding.TextMarshaler interface by returning the
// form the payload in the git commit message.
func (p PayloadUnion) MarshalText() ([]byte, error) {
msgHead := abbrevCommitMessage(p.Payload().MessageHead(p.Common))
msgBodyB, err := yaml.Marshal(p)
if err != nil {
return nil, fmt.Errorf("marshaling payload %+v as yaml: %w", p, err)
}
w := new(bytes.Buffer)
w.WriteString(msgHead)
w.WriteString("\n\n---\n")
w.Write(msgBodyB)
return w.Bytes(), nil
}
// UnmarshalText implements the encoding.TextUnmarshaler interface by decoding a
// payload object which has been encoded into a git commit message.
func (p *PayloadUnion) UnmarshalText(msg []byte) error {
i := bytes.Index(msg, []byte("\n"))
if i < 0 {
return fmt.Errorf("commit message %q is malformed, it has no body", msg)
}
msgBody := msg[i:]
if err := yaml.Unmarshal(msgBody, p); err != nil {
return fmt.Errorf("unmarshaling commit payload from yaml: %w", err)
}
return nil
}
// AccreditPayload returns the given PayloadUnion with an appended Credential
// provided by the given SignifierInterface.
func (proj *Project) AccreditPayload(payUn PayloadUnion, sig sigcred.Signifier) (PayloadUnion, error) {
headFS, err := proj.headFS()
if err != nil {
return payUn, fmt.Errorf("retrieving HEAD fs: %w", err)
}
cred, err := sig.Sign(headFS, payUn.Common.Fingerprint)
if err != nil {
return payUn, fmt.Errorf("signing fingerprint %q: %w", payUn.Common.Fingerprint, err)
}
payUn.Common.Credentials = append(payUn.Common.Credentials, cred)
return payUn, nil
}
// CommitDirectParams are the parameters to the CommitDirect method. All are
// required, unless otherwise noted.
type CommitDirectParams struct {
PayloadUnion PayloadUnion
Author string
ParentHash plumbing.Hash // can be zero if the commit has no parents (Q_Q)
GitTree *object.Tree
}
// CommitDirect constructs a git commit object and and stores it, returning the
// resulting Commit. This method does not interact with HEAD at all.
func (proj *Project) CommitDirect(params CommitDirectParams) (Commit, error) {
msgB, err := params.PayloadUnion.MarshalText()
if err != nil {
return Commit{}, fmt.Errorf("encoding payload to message string: %w", err)
}
author := object.Signature{
Name: params.Author,
When: time.Now(),
}
commit := &object.Commit{
Author: author,
Committer: author,
Message: string(msgB),
TreeHash: params.GitTree.Hash,
}
if params.ParentHash != plumbing.ZeroHash {
commit.ParentHashes = []plumbing.Hash{params.ParentHash}
}
commitObj := proj.GitRepo.Storer.NewEncodedObject()
if err := commit.Encode(commitObj); err != nil {
return Commit{}, fmt.Errorf("encoding commit object: %w", err)
}
commitHash, err := proj.GitRepo.Storer.SetEncodedObject(commitObj)
if err != nil {
return Commit{}, fmt.Errorf("setting encoded object: %w", err)
}
return proj.GetCommit(commitHash)
}
// Commit uses the given PayloadUnion to create a git commit object and commits
// it to the current HEAD, returning the full Commit.
func (proj *Project) Commit(payUn PayloadUnion) (Commit, error) {
headRef, err := proj.TraverseReferenceChain(plumbing.HEAD, func(ref *plumbing.Reference) bool {
return ref.Type() == plumbing.HashReference
})
if err != nil {
return Commit{}, fmt.Errorf("resolving HEAD to a hash reference: %w", err)
}
headRefName := headRef.Name()
headHash, err := proj.ReferenceToHash(headRefName)
if err != nil {
return Commit{}, fmt.Errorf("resolving ref %q (HEAD): %w", headRefName, err)
}
// TODO this is also used in the same way in NewCommitChange. It might make
// sense to refactor this logic out, it might not be needed in fs at all.
_, stagedTree, err := fs.FromStagedChangesTree(proj.GitRepo)
if err != nil {
return Commit{}, fmt.Errorf("getting staged changes: %w", err)
}
commit, err := proj.CommitDirect(CommitDirectParams{
PayloadUnion: payUn,
Author: strings.Join(payUn.Common.credIDs(), ", "),
ParentHash: headHash,
GitTree: stagedTree,
})
if err != nil {
return Commit{}, err
}
// now set the branch to this new commit
newHeadRef := plumbing.NewHashReference(headRefName, commit.Hash)
if err := proj.GitRepo.Storer.SetReference(newHeadRef); err != nil {
return Commit{}, fmt.Errorf("setting reference %q to new commit hash %q: %w",
headRefName, commit.Hash, err)
}
return commit, nil
}
// HasStagedChanges returns true if there are file changes which have been
// staged (e.g. via "git add").
func (proj *Project) HasStagedChanges() (bool, error) {
w, err := proj.GitRepo.Worktree()
if err != nil {
return false, fmt.Errorf("retrieving worktree: %w", err)
}
status, err := w.Status()
if err != nil {
return false, fmt.Errorf("retrieving worktree status: %w", err)
}
var any bool
for _, fileStatus := range status {
if fileStatus.Staging != git.Unmodified &&
fileStatus.Staging != git.Untracked {
any = true
break
}
}
return any, nil
}
// VerifyCommits verifies that the given commits, which are presumably on the
// given branch, are gucci.
func (proj *Project) VerifyCommits(branchName plumbing.ReferenceName, commits []Commit) error {
// this isn't strictly necessary for this method, but it helps discover bugs
// in other parts of the code.
if len(commits) == 0 {
return errors.New("cannot call VerifyCommits with empty commit slice")
}
// First determine the root of the main branch. All commits need to be an
// ancestor of it. If the main branch has not been created yet then there
// might not be a root commit yet.
var rootCommitObj *object.Commit
mainCommit, err := proj.GetCommitByRevision(plumbing.Revision(MainRefName))
if errors.Is(err, plumbing.ErrReferenceNotFound) {
// main branch hasn't been created yet. The commits can only be verified
// if they are for the main branch and they include the root commit.
if branchName != MainRefName {
return fmt.Errorf("cannot verify commits in branch %q when no main branch exists", branchName)
}
for _, commit := range commits {
if commit.Object.NumParents() == 0 {
rootCommitObj = commit.Object
break
}
}
if rootCommitObj == nil {
return errors.New("root commit of main branch cannot be determined")
}
} else if err != nil {
return fmt.Errorf("retrieving commit at HEAD of %q: %w", MainRefName.Short(), err)
} else {
rootCommitObj = mainCommit.Object
for {
if rootCommitObj.NumParents() == 0 {
break
} else if rootCommitObj.NumParents() > 1 {
return fmt.Errorf("commit %q in main branch has more than one parent", rootCommitObj.Hash)
} else if rootCommitObj, err = rootCommitObj.Parent(0); err != nil {
return fmt.Errorf("retrieving parent commit of %q: %w", rootCommitObj.Hash, err)
}
}
}
// We also need the HEAD of the given branch, if it exists.
branchCommit, err := proj.GetCommitByRevision(plumbing.Revision(branchName))
if err != nil && !errors.Is(err, plumbing.ErrReferenceNotFound) {
return fmt.Errorf("retrieving commit at HEAD of %q: %w", branchName.Short(), err)
}
for i, commit := range commits {
// It's not a requirement that the given Commits are in ancestral order,
// but usually they are; if the previous commit is the parent of this
// one we can skip a bunch of work.
var parentTree *object.Tree
var isNonFF bool
if i > 0 && commits[i-1].Hash == commit.Object.ParentHashes[0] {
parentTree = commits[i-1].TreeObject
} else if commit.Hash == rootCommitObj.Hash {
// looking at the root commit, assume it's ok
} else {
var err error
isAncestor := func(older, younger *object.Commit) bool {
var isAncestor bool
if err != nil {
return false
} else if isAncestor, err = older.IsAncestor(younger); err != nil {
err = fmt.Errorf("determining if %q is an ancestor of %q: %w",
younger.Hash, older.Hash, err)
return false
}
return isAncestor
}
ancestorOfRoot := isAncestor(rootCommitObj, commit.Object)
if branchCommit.Hash != plumbing.ZeroHash { // checking if the var was set
// this could only be a nonFF if the branch actually exists.
isNonFF = !isAncestor(branchCommit.Object, commit.Object)
}
if err != nil {
return err
} else if !ancestorOfRoot {
return fmt.Errorf("commit %q must be direct descendant of root commit of %q (%q)",
commit.Hash, MainRefName.Short(), rootCommitObj.Hash,
)
}
}
if err := proj.verifyCommit(branchName, commit, parentTree, isNonFF); err != nil {
return fmt.Errorf("verifying commit %q: %w", commit.Hash, err)
}
}
return nil
}
// parentTree returns the tree of the parent commit of the given commit. If the
// given commit has no parents then a bare tree is returned.
func (proj *Project) parentTree(commitObj *object.Commit) (*object.Tree, error) {
switch commitObj.NumParents() {
case 0:
return new(object.Tree), nil
case 1:
if parentCommitObj, err := commitObj.Parent(0); err != nil {
return nil, fmt.Errorf("getting parent commit %q: %w",
commitObj.ParentHashes[0], err)
} else if parentTree, err := proj.GitRepo.TreeObject(parentCommitObj.TreeHash); err != nil {
return nil, fmt.Errorf("getting parent tree object %q: %w",
parentCommitObj.TreeHash, err)
} else {
return parentTree, nil
}
default:
return nil, errors.New("commit has multiple parents")
}
}
// if parentTree is nil then it will be inferred.
func (proj *Project) verifyCommit(
branchName plumbing.ReferenceName,
commit Commit,
parentTree *object.Tree,
isNonFF bool,
) error {
if parentTree == nil {
var err error
if parentTree, err = proj.parentTree(commit.Object); err != nil {
return fmt.Errorf("retrieving parent tree of commit: %w", err)
}
}
var sigFS fs.FS
if commit.Object.NumParents() == 0 {
sigFS = fs.FromTree(commit.TreeObject)
} else {
sigFS = fs.FromTree(parentTree)
}
cfg, err := proj.loadConfig(sigFS)
if err != nil {
return fmt.Errorf("loading config of parent %q: %w", commit.Object.ParentHashes[0], err)
}
// assert access controls
changedFiles, err := ChangedFilesBetweenTrees(parentTree, commit.TreeObject)
if err != nil {
return fmt.Errorf("calculating diff from tree %q to tree %q: %w",
parentTree.Hash, commit.TreeObject.Hash, err)
} else if len(changedFiles) > 0 && commit.Payload.Change == nil {
return errors.New("files changes but commit is not a change commit")
}
pathsChanged := make([]string, len(changedFiles))
for i := range changedFiles {
pathsChanged[i] = changedFiles[i].Path
}
commitType := commit.Payload.Type()
err = accessctl.AssertCanCommit(cfg.AccessControls, accessctl.CommitRequest{
Type: commitType,
Branch: branchName.Short(),
Credentials: commit.Payload.Common.Credentials,
FilesChanged: pathsChanged,
NonFastForward: isNonFF,
})
if err != nil {
return fmt.Errorf("asserting access controls: %w", err)
}
// ensure the fingerprint is what it's expected to be
storedFingerprint := commit.Payload.Common.Fingerprint
expectedFingerprint, err := commit.Payload.Payload().Fingerprint(changedFiles)
if err != nil {
return fmt.Errorf("calculating expected payload fingerprint: %w", err)
} else if expectedFingerprint == nil {
// the payload doesn't have a fingerprint of its own, it's just carrying
// one, so no point in checking if it's "correct".
} else if !bytes.Equal(storedFingerprint, expectedFingerprint) {
return fmt.Errorf("unexpected fingerprint in payload, is %q but should be %q",
storedFingerprint, yamlutil.Blob(expectedFingerprint))
}
// verify all credentials
for _, cred := range commit.Payload.Common.Credentials {
if cred.AccountID == "" {
if err := cred.SelfVerify(storedFingerprint); err != nil {
return fmt.Errorf("verifying credential %+v: %w", cred, err)
}
} else {
sig, err := proj.signifierForCredential(sigFS, cred)
if err != nil {
return fmt.Errorf("finding signifier for credential %+v: %w", cred, err)
} else if err := sig.Verify(sigFS, storedFingerprint, cred); err != nil {
return fmt.Errorf("verifying credential %+v: %w", cred, err)
}
}
}
return nil
}
// LastChangeDescription iterates over the given commits in reverse order and
// returns the first change description it comes across. A change description
// may come from a change payload or a credential payload which covers a set of
// changes.
//
// This function will return an error if no given commits contain a change
// description.
func LastChangeDescription(commits []Commit) (string, error) {
for i := range commits {
i = len(commits) - 1 - i
payUn := commits[i].Payload
if payUn.Change != nil {
return payUn.Change.Description, nil
} else if payUn.Credential != nil && payUn.Credential.ChangeDescription != "" {
return payUn.Credential.ChangeDescription, nil
}
}
return "", errors.New("no commits in range contain a change description")
}
type changeRangeInfo struct {
changeCommits []Commit
authors map[string]struct{}
startTree, endTree *object.Tree
changeDescription string
}
// changeRangeInfo returns various pieces of information about a range of
// commits' changes.
func (proj *Project) changeRangeInfo(commits []Commit) (changeRangeInfo, error) {
info := changeRangeInfo{
authors: map[string]struct{}{},
}
for _, commit := range commits {
if commit.Payload.Change != nil {
info.changeCommits = append(info.changeCommits, commit)
for _, cred := range commit.Payload.Common.Credentials {
info.authors[cred.AccountID] = struct{}{}
}
}
}
if len(info.changeCommits) == 0 {
return changeRangeInfo{}, errors.New("no change commits found in range")
}
// startTree has to be the tree of the parent of the first commit, which
// isn't included in commits. Determine it the hard way.
var err error
if info.startTree, err = proj.parentTree(commits[0].Object); err != nil {
return changeRangeInfo{}, fmt.Errorf("getting tree of parent of %q: %w",
commits[0].Hash, err)
} else if info.changeDescription, err = LastChangeDescription(commits); err != nil {
return changeRangeInfo{}, err
}
lastChangeCommit := info.changeCommits[len(info.changeCommits)-1]
info.endTree = lastChangeCommit.TreeObject
return info, nil
}
func (info changeRangeInfo) changeFingerprint(descr string) ([]byte, error) {
changedFiles, err := ChangedFilesBetweenTrees(info.startTree, info.endTree)
if err != nil {
return nil, fmt.Errorf("calculating diff of commit trees %q and %q: %w",
info.startTree.Hash, info.endTree.Hash, err)
}
return genChangeFingerprint(nil, descr, changedFiles), nil
}
// VerifyCanSetBranchHEADTo is used to verify that a branch's HEAD can be set to
// the given hash. It verifies any new commits which are being added, and
// handles verifying non-fast-forward commits as well.
//
// If the given hash matches the current HEAD of the branch then this performs
// no further checks and returns nil.
func (proj *Project) VerifyCanSetBranchHEADTo(branchName plumbing.ReferenceName, hash plumbing.Hash) error {
oldCommitRef, err := proj.GitRepo.Reference(branchName, true)
if errors.Is(err, plumbing.ErrReferenceNotFound) {
// if the branch is being created then just pull all of its commits and
// verify them.
// TODO optimize this so that it tries to use the merge-base with main,
// so we're not re-verifying a ton of commits unecessarily
commits, err := proj.GetCommitRange(plumbing.ZeroHash, hash)
if err != nil {
return fmt.Errorf("retrieving %q and all its ancestors: %w", hash, err)
}
return proj.VerifyCommits(branchName, commits)
} else if err != nil {
return fmt.Errorf("resolving branch reference to a hash: %w", err)
} else if oldCommitRef.Hash() == hash {
// if the HEAD is already at the given hash then it must be fine.
return nil
}
oldCommitObj, err := proj.GitRepo.CommitObject(oldCommitRef.Hash())
if err != nil {
return fmt.Errorf("retrieving commit object %q: %w", oldCommitRef.Hash(), err)
}
newCommit, err := proj.GetCommit(hash)
if err != nil {
return fmt.Errorf("retrieving commit %q: %w", hash, err)
}
if isAncestor, err := newCommit.Object.IsAncestor(oldCommitObj); err != nil {
return fmt.Errorf("determining if %q is an ancestor of %q: %w",
newCommit.Hash, oldCommitObj.Hash, err)
} else if isAncestor {
// if the new commit is an ancestor of the old one then the branch is
// being force-pushed to a previous commit. This is weird to handle
// using VerifyCommits, so just call verifyCommit directly.
return proj.verifyCommit(branchName, newCommit, nil, true)
}
mbCommits, err := oldCommitObj.MergeBase(newCommit.Object)
if err != nil {
return fmt.Errorf("determining merge-base between %q and %q: %w",
oldCommitObj.Hash, newCommit.Hash, err)
} else if len(mbCommits) == 0 {
return fmt.Errorf("%q and %q have no ancestors in common",
oldCommitObj.Hash, newCommit.Hash)
} else if len(mbCommits) == 2 {
return fmt.Errorf("%q and %q have more than one ancestor in common",
oldCommitObj.Hash, newCommit.Hash)
}
commits, err := proj.GetCommitRange(mbCommits[0].Hash, hash)
if err != nil {
return fmt.Errorf("retrieving commits %q to %q: %w", mbCommits[0].Hash, hash, err)
}
return proj.VerifyCommits(branchName, commits)
}