A read-only clone of the dehub project, for until dehub.dev can be brought back online.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dehub/payload.go

599 lines
19 KiB

Completely refactor naming of everything, in light of new SPEC --- type: change description: |- Completely refactor naming of everything, in light of new SPEC Writing the SPEC shed some light on just how weakly a lot of concepts, like "commit", had been defined, and prompted the delineation of a lot of things along specific lines (commit vs payload, repo vs project). This commit makes the code reflect the SPEC much better in quite a few ways: * Repo is now Project * Commit is now Payload * GitCommit is now just Commit * Hash is now Fingerprint * A lot of minor fields got renamed * All the XXXInterface types are now just XXX, and their old XXX type is now XXXUnion. More than likely there's still some comments and variable names that have slipped passed, but overall I feel like I got most of the changes. fingerprint: AKkDC5BKhKbfXzZQ/F4KquHeMgVvcNxgLmkZFz/nP/tY credentials: - type: pgp_signature pub_key_id: 95C46FA6A41148AC body: iQIzBAABAgAdFiEEJ6tQKp6olvZKJ0lwlcRvpqQRSKwFAl6l7aYACgkQlcRvpqQRSKxFrA//VQ+f8B6pwGS3ORB4VVBnHvvJTGZvAYTvB0fHuHJx2EreR4FwjhaNakk5ClkwbO7WFMq++2OV4xIkvzwswLdbXZF0IHx3wScQM59v4vIkR4V9Lj5p1aGGhQna52uIKugF2gTqKdU4tqYzmBjDND/c2XDwCN5CwTwwnAHXUSSsHxviiPUYPWV5wzFP7uyRW0ZeK8Isv7QECKRXlsDjcSJa+g+jc091FG/jG9Dkai8fbDbW8YXj7W3ALaXgXWEBJMrgQxZcJJRjgCvLY72FIIrUBquu3FepiyzMtZ0yaIvi4NmGCsYqIv00NcMvMtD7iwhOCZn10Sku4wvaKJ8YBMRduhqC99fnr/ZDW0/HvTNcL7GKx11GjwtmzkJgwsHFPy3zX+kMdF4m3WgtoeI0GwEsBXXZE2C49yAk3Mb/3puegl3a1PPMvOabTzo7Xm6xpWkI6gISChI7My71H3EuKZWhkb+IubPmMvJJXIdVxHnsHPz2dl/BZXLgpfVdEgQa2qWeXtYI4NNm37pLl3gv92V4kka+Kr4gfdoq8mJ7aqvc9was35baJbHg4+fEVJG2Wj+2AQU+ncx3nAFzgYyMxwo9K8VuC4QdfRF4ImyxTnWkuokEn9H6JRrbkBDKIELj6vzdPmsjOUEQ4nsYX66/zSibFD7UvhQmdXFs8Gp8/Qq6g4M= account: mediocregopher
4 years ago
package dehub
import (
"bytes"
"errors"
"fmt"
"sort"
"strings"
"time"
"dehub.dev/src/dehub.git/accessctl"
"dehub.dev/src/dehub.git/fs"
"dehub.dev/src/dehub.git/sigcred"
"dehub.dev/src/dehub.git/typeobj"
"dehub.dev/src/dehub.git/yamlutil"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
yaml "gopkg.in/yaml.v2"
)
// Payload describes the methods which must be implemented by the different
// payload types. None of the methods should modify the underlying object.
type Payload interface {
// MessageHead returns the head of the commit message (i.e. the first line).
// The PayloadCommon of the outer PayloadUnion is passed in for added
// context, if necessary.
MessageHead(PayloadCommon) (string, error)
// Fingerprint returns the raw fingerprint which can be signed when
// accrediting this payload. The ChangedFile objects given describe the file
// changes between the parent commit and this commit.
//
// If this method returns nil it means that the payload has no fingerprint
// in-and-of-itself.
Fingerprint([]ChangedFile) ([]byte, error)
}
// PayloadCommon describes the fields common to all Payloads.
type PayloadCommon struct {
Fingerprint yamlutil.Blob `yaml:"fingerprint"`
Credentials []sigcred.CredentialUnion `yaml:"credentials"`
// LegacyChangeHash is no longer used, use Fingerprint instead.
LegacyChangeHash yamlutil.Blob `yaml:"change_hash,omitempty"`
}
func (cc PayloadCommon) credIDs() []string {
m := map[string]struct{}{}
for _, cred := range cc.Credentials {
if cred.AccountID != "" {
m[cred.AccountID] = struct{}{}
} else if cred.AnonID != "" {
m[cred.AnonID] = struct{}{}
}
}
s := make([]string, 0, len(m))
for id := range m {
s = append(s, id)
}
sort.Strings(s)
return s
}
func abbrevCommitMessage(msg string) string {
i := strings.Index(msg, "\n")
if i > 0 {
msg = msg[:i]
}
if len(msg) > 80 {
msg = msg[:77] + "..."
}
return msg
}
// PayloadUnion represents a single Payload of variable type. Only one field
// should be set on a PayloadUnion, unless otherwise noted.
type PayloadUnion struct {
Change *PayloadChange `type:"change,default"`
Credential *PayloadCredential `type:"credential"`
Comment *PayloadComment `type:"comment"`
// Common may be set in addition to one of the other fields.
Common PayloadCommon `yaml:",inline"`
}
// MarshalYAML implements the yaml.Marshaler interface.
func (p PayloadUnion) MarshalYAML() (interface{}, error) {
return typeobj.MarshalYAML(p)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (p *PayloadUnion) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err := typeobj.UnmarshalYAML(p, unmarshal); err != nil {
return err
} else if len(p.Common.LegacyChangeHash) > 0 {
p.Common.Fingerprint = p.Common.LegacyChangeHash
p.Common.LegacyChangeHash = nil
}
return nil
}
// Payload returns the Payload instance encapsulated by this PayloadUnion.
//
// This will panic if a Payload field is not populated.
func (p PayloadUnion) Payload() Payload {
el, _, err := typeobj.Element(p)
if err != nil {
panic(err)
}
return el.(Payload)
}
// Type returns the Payload's type (as would be used in its YAML "type" field).
//
// This will panic if a Payload field is not populated.
func (p PayloadUnion) Type() string {
_, typeStr, err := typeobj.Element(p)
if err != nil {
panic(err)
}
return typeStr
}
// MarshalText implements the encoding.TextMarshaler interface by returning the
// form the payload in the git commit message.
func (p PayloadUnion) MarshalText() ([]byte, error) {
msgHead, err := p.Payload().MessageHead(p.Common)
if err != nil {
return nil, fmt.Errorf("constructing message head: %w", err)
}
msgBodyB, err := yaml.Marshal(p)
if err != nil {
return nil, fmt.Errorf("marshaling payload %+v as yaml: %w", p, err)
}
w := new(bytes.Buffer)
w.WriteString(msgHead)
w.WriteString("\n\n---\n")
w.Write(msgBodyB)
return w.Bytes(), nil
}
// UnmarshalText implements the encoding.TextUnmarshaler interface by decoding a
// payload object which has been encoded into a git commit message.
func (p *PayloadUnion) UnmarshalText(msg []byte) error {
i := bytes.Index(msg, []byte("\n"))
if i < 0 {
return fmt.Errorf("commit message %q is malformed, it has no body", msg)
}
msgBody := msg[i:]
if err := yaml.Unmarshal(msgBody, p); err != nil {
return fmt.Errorf("unmarshaling commit payload from yaml: %w", err)
}
return nil
}
// AccreditPayload returns the given PayloadUnion with an appended Credential
// provided by the given SignifierInterface.
func (proj *Project) AccreditPayload(payUn PayloadUnion, sig sigcred.Signifier) (PayloadUnion, error) {
headFS, err := proj.headFS()
if err != nil {
return payUn, fmt.Errorf("retrieving HEAD fs: %w", err)
}
cred, err := sig.Sign(headFS, payUn.Common.Fingerprint)
if err != nil {
return payUn, fmt.Errorf("signing fingerprint %q: %w", payUn.Common.Fingerprint, err)
}
payUn.Common.Credentials = append(payUn.Common.Credentials, cred)
return payUn, nil
}
// CommitDirectParams are the parameters to the CommitDirect method. All are
// required, unless otherwise noted.
type CommitDirectParams struct {
PayloadUnion PayloadUnion
Author string
ParentHash plumbing.Hash // can be zero if the commit has no parents (Q_Q)
GitTree *object.Tree
}
// CommitDirect constructs a git commit object and and stores it, returning the
// resulting Commit. This method does not interact with HEAD at all.
func (proj *Project) CommitDirect(params CommitDirectParams) (Commit, error) {
msgB, err := params.PayloadUnion.MarshalText()
if err != nil {
return Commit{}, fmt.Errorf("encoding payload to message string: %w", err)
}
author := object.Signature{
Name: params.Author,
When: time.Now(),
}
commit := &object.Commit{
Author: author,
Committer: author,
Message: string(msgB),
TreeHash: params.GitTree.Hash,
}
if params.ParentHash != plumbing.ZeroHash {
commit.ParentHashes = []plumbing.Hash{params.ParentHash}
}
commitObj := proj.GitRepo.Storer.NewEncodedObject()
if err := commit.Encode(commitObj); err != nil {
return Commit{}, fmt.Errorf("encoding commit object: %w", err)
}
commitHash, err := proj.GitRepo.Storer.SetEncodedObject(commitObj)
if err != nil {
return Commit{}, fmt.Errorf("setting encoded object: %w", err)
}
return proj.GetCommit(commitHash)
}
// Commit uses the given PayloadUnion to create a git commit object and commits
// it to the current HEAD, returning the full Commit.
func (proj *Project) Commit(payUn PayloadUnion) (Commit, error) {
headRef, err := proj.TraverseReferenceChain(plumbing.HEAD, func(ref *plumbing.Reference) bool {
return ref.Type() == plumbing.HashReference
})
if err != nil {
return Commit{}, fmt.Errorf("resolving HEAD to a hash reference: %w", err)
}
headRefName := headRef.Name()
headHash, err := proj.ReferenceToHash(headRefName)
if err != nil {
return Commit{}, fmt.Errorf("resolving ref %q (HEAD): %w", headRefName, err)
}
// TODO this is also used in the same way in NewCommitChange. It might make
// sense to refactor this logic out, it might not be needed in fs at all.
_, stagedTree, err := fs.FromStagedChangesTree(proj.GitRepo)
if err != nil {
return Commit{}, fmt.Errorf("getting staged changes: %w", err)
}
commit, err := proj.CommitDirect(CommitDirectParams{
PayloadUnion: payUn,
Author: strings.Join(payUn.Common.credIDs(), ", "),
ParentHash: headHash,
GitTree: stagedTree,
})
if err != nil {
return Commit{}, err
}
// now set the branch to this new commit
newHeadRef := plumbing.NewHashReference(headRefName, commit.Hash)
if err := proj.GitRepo.Storer.SetReference(newHeadRef); err != nil {
return Commit{}, fmt.Errorf("setting reference %q to new commit hash %q: %w",
headRefName, commit.Hash, err)
}
return commit, nil
}
// HasStagedChanges returns true if there are file changes which have been
// staged (e.g. via "git add").
func (proj *Project) HasStagedChanges() (bool, error) {
w, err := proj.GitRepo.Worktree()
if err != nil {
return false, fmt.Errorf("retrieving worktree: %w", err)
}
status, err := w.Status()
if err != nil {
return false, fmt.Errorf("retrieving worktree status: %w", err)
}
var any bool
for _, fileStatus := range status {
if fileStatus.Staging != git.Unmodified &&
fileStatus.Staging != git.Untracked {
any = true
break
}
}
return any, nil
}
// VerifyCommits verifies that the given commits, which are presumably on the
// given branch, are gucci.
func (proj *Project) VerifyCommits(branchName plumbing.ReferenceName, commits []Commit) error {
// this isn't strictly necessary for this method, but it helps discover bugs
// in other parts of the code.
if len(commits) == 0 {
return errors.New("cannot call VerifyCommits with empty commit slice")
}
// First determine the root of the main branch. All commits need to be an
// ancestor of it. If the main branch has not been created yet then there
// might not be a root commit yet.
var rootCommitObj *object.Commit
mainCommit, err := proj.GetCommitByRevision(plumbing.Revision(MainRefName))
if errors.Is(err, plumbing.ErrReferenceNotFound) {
// main branch hasn't been created yet. The commits can only be verified
// if they are for the main branch and they include the root commit.
if branchName != MainRefName {
return fmt.Errorf("cannot verify commits in branch %q when no main branch exists", branchName)
}
for _, commit := range commits {
if commit.Object.NumParents() == 0 {
rootCommitObj = commit.Object
break
}
}
if rootCommitObj == nil {
return errors.New("root commit of main branch cannot be determined")
}
} else if err != nil {
return fmt.Errorf("retrieving commit at HEAD of %q: %w", MainRefName.Short(), err)
} else {
rootCommitObj = mainCommit.Object
for {
if rootCommitObj.NumParents() == 0 {
break
} else if rootCommitObj.NumParents() > 1 {
return fmt.Errorf("commit %q in main branch has more than one parent", rootCommitObj.Hash)
} else if rootCommitObj, err = rootCommitObj.Parent(0); err != nil {
return fmt.Errorf("retrieving parent commit of %q: %w", rootCommitObj.Hash, err)
}
}
}
// We also need the HEAD of the given branch, if it exists.
branchCommit, err := proj.GetCommitByRevision(plumbing.Revision(branchName))
if err != nil && !errors.Is(err, plumbing.ErrReferenceNotFound) {
return fmt.Errorf("retrieving commit at HEAD of %q: %w", branchName.Short(), err)
}
for i, commit := range commits {
// It's not a requirement that the given Commits are in ancestral order,
// but usually they are; if the previous commit is the parent of this
// one we can skip a bunch of work.
var parentTree *object.Tree
var isNonFF bool
if i > 0 && commits[i-1].Hash == commit.Object.ParentHashes[0] {
parentTree = commits[i-1].TreeObject
} else if commit.Hash == rootCommitObj.Hash {
// looking at the root commit, assume it's ok
} else {
var err error
isAncestor := func(older, younger *object.Commit) bool {
var isAncestor bool
if err != nil {
return false
} else if isAncestor, err = older.IsAncestor(younger); err != nil {
err = fmt.Errorf("determining if %q is an ancestor of %q: %w",
younger.Hash, older.Hash, err)
return false
}
return isAncestor
}
ancestorOfRoot := isAncestor(rootCommitObj, commit.Object)
if branchCommit.Hash != plumbing.ZeroHash { // checking if the var was set
// this could only be a nonFF if the branch actually exists.
isNonFF = !isAncestor(branchCommit.Object, commit.Object)
}
if err != nil {
return err
} else if !ancestorOfRoot {
return fmt.Errorf("commit %q must be direct descendant of root commit of %q (%q)",
commit.Hash, MainRefName.Short(), rootCommitObj.Hash,
)
}
}
if err := proj.verifyCommit(branchName, commit, parentTree, isNonFF); err != nil {
return fmt.Errorf("verifying commit %q: %w", commit.Hash, err)
}
}
return nil
}
// parentTree returns the tree of the parent commit of the given commit. If the
// given commit has no parents then a bare tree is returned.
func (proj *Project) parentTree(commitObj *object.Commit) (*object.Tree, error) {
switch commitObj.NumParents() {
case 0:
return new(object.Tree), nil
case 1:
if parentCommitObj, err := commitObj.Parent(0); err != nil {
return nil, fmt.Errorf("getting parent commit %q: %w",
commitObj.ParentHashes[0], err)
} else if parentTree, err := proj.GitRepo.TreeObject(parentCommitObj.TreeHash); err != nil {
return nil, fmt.Errorf("getting parent tree object %q: %w",
parentCommitObj.TreeHash, err)
} else {
return parentTree, nil
}
default:
return nil, errors.New("commit has multiple parents")
}
}
// if parentTree is nil then it will be inferred.
func (proj *Project) verifyCommit(
branchName plumbing.ReferenceName,
commit Commit,
parentTree *object.Tree,
isNonFF bool,
) error {
parentTree, err := proj.parentTree(commit.Object)
if err != nil {
return fmt.Errorf("retrieving parent tree of commit: %w", err)
}
var sigFS fs.FS
if commit.Object.NumParents() == 0 {
sigFS = fs.FromTree(commit.TreeObject)
} else {
sigFS = fs.FromTree(parentTree)
}
cfg, err := proj.loadConfig(sigFS)
if err != nil {
return fmt.Errorf("loading config of parent %q: %w", commit.Object.ParentHashes[0], err)
}
// assert access controls
changedFiles, err := ChangedFilesBetweenTrees(parentTree, commit.TreeObject)
if err != nil {
return fmt.Errorf("calculating diff from tree %q to tree %q: %w",
parentTree.Hash, commit.TreeObject.Hash, err)
} else if len(changedFiles) > 0 && commit.Payload.Change == nil {
return errors.New("files changes but commit is not a change commit")
}
pathsChanged := make([]string, len(changedFiles))
for i := range changedFiles {
pathsChanged[i] = changedFiles[i].Path
}
commitType := commit.Payload.Type()
err = accessctl.AssertCanCommit(cfg.AccessControls, accessctl.CommitRequest{
Type: commitType,
Branch: branchName.Short(),
Credentials: commit.Payload.Common.Credentials,
FilesChanged: pathsChanged,
NonFastForward: isNonFF,
})
if err != nil {
return fmt.Errorf("asserting access controls: %w", err)
}
// ensure the fingerprint is what it's expected to be
storedFingerprint := commit.Payload.Common.Fingerprint
expectedFingerprint, err := commit.Payload.Payload().Fingerprint(changedFiles)
if err != nil {
return fmt.Errorf("calculating expected payload fingerprint: %w", err)
} else if expectedFingerprint == nil {
// the payload doesn't have a fingerprint of its own, it's just carrying
// one, so no point in checking if it's "correct".
} else if !bytes.Equal(storedFingerprint, expectedFingerprint) {
return fmt.Errorf("unexpected fingerprint in payload, is %q but should be %q",
storedFingerprint, yamlutil.Blob(expectedFingerprint))
}
// verify all credentials
for _, cred := range commit.Payload.Common.Credentials {
if cred.AccountID == "" {
if err := cred.SelfVerify(storedFingerprint); err != nil {
return fmt.Errorf("verifying credential %+v: %w", cred, err)
}
} else {
sig, err := proj.signifierForCredential(sigFS, cred)
if err != nil {
return fmt.Errorf("finding signifier for credential %+v: %w", cred, err)
} else if err := sig.Verify(sigFS, storedFingerprint, cred); err != nil {
return fmt.Errorf("verifying credential %+v: %w", cred, err)
}
}
}
return nil
}
type changeRangeInfo struct {
changeCommits []Commit
authors map[string]struct{}
msg string
startTree, endTree *object.Tree
changeFingerprint []byte
}
// changeRangeInfo returns various pieces of information about a range of
// commits' changes.
func (proj *Project) changeRangeInfo(commits []Commit) (changeRangeInfo, error) {
info := changeRangeInfo{
authors: map[string]struct{}{},
}
for _, commit := range commits {
if commit.Payload.Change != nil {
info.changeCommits = append(info.changeCommits, commit)
for _, cred := range commit.Payload.Common.Credentials {
info.authors[cred.AccountID] = struct{}{}
}
}
}
if len(info.changeCommits) == 0 {
return changeRangeInfo{}, errors.New("no change commits found in range")
}
// startTree has to be the tree of the parent of the first commit, which
// isn't included in commits. Determine it the hard way.
var err error
if info.startTree, err = proj.parentTree(commits[0].Object); err != nil {
return changeRangeInfo{}, fmt.Errorf("getting tree of parent of %q: %w",
commits[0].Hash, err)
}
lastChangeCommit := info.changeCommits[len(info.changeCommits)-1]
info.msg = lastChangeCommit.Payload.Change.Description
info.endTree = lastChangeCommit.TreeObject
changedFiles, err := ChangedFilesBetweenTrees(info.startTree, info.endTree)
if err != nil {
return changeRangeInfo{}, fmt.Errorf("calculating diff of commit trees %q and %q: %w",
info.startTree.Hash, info.endTree.Hash, err)
}
info.changeFingerprint = genChangeFingerprint(nil, info.msg, changedFiles)
return info, nil
}
// VerifyCanSetBranchHEADTo is used to verify that a branch's HEAD can be set to
// the given hash. It verifies any new commits which are being added, and
// handles verifying non-fast-forward commits as well.
//
// If the given hash matches the current HEAD of the branch then this performs
// no further checks and returns nil.
func (proj *Project) VerifyCanSetBranchHEADTo(branchName plumbing.ReferenceName, hash plumbing.Hash) error {
oldCommitRef, err := proj.GitRepo.Reference(branchName, true)
if errors.Is(err, plumbing.ErrReferenceNotFound) {
// if the branch is being created then just pull all of its commits and
// verify them.
// TODO optimize this so that it tries to use the merge-base with main,
// so we're not re-verifying a ton of commits unecessarily
commits, err := proj.GetCommitRange(plumbing.ZeroHash, hash)
if err != nil {
return fmt.Errorf("retrieving %q and all its ancestors: %w", hash, err)
}
return proj.VerifyCommits(branchName, commits)
} else if err != nil {
return fmt.Errorf("resolving branch reference to a hash: %w", err)
} else if oldCommitRef.Hash() == hash {
// if the HEAD is already at the given hash then it must be fine.
return nil
}
oldCommitObj, err := proj.GitRepo.CommitObject(oldCommitRef.Hash())
if err != nil {
return fmt.Errorf("retrieving commit object %q: %w", oldCommitRef.Hash(), err)
}
newCommitObj, err := proj.GitRepo.CommitObject(hash)
if err != nil {
return fmt.Errorf("retrieving commit object %q: %w", hash, err)
}
mbCommits, err := oldCommitObj.MergeBase(newCommitObj)
if err != nil {
return fmt.Errorf("determining merge-base between %q and %q: %w",
oldCommitObj.Hash, newCommitObj.Hash, err)
} else if len(mbCommits) == 0 {
return fmt.Errorf("%q and %q have no ancestors in common",
oldCommitObj.Hash, newCommitObj.Hash)
} else if len(mbCommits) == 2 {
return fmt.Errorf("%q and %q have more than one ancestor in common",
oldCommitObj.Hash, newCommitObj.Hash)
}
commits, err := proj.GetCommitRange(mbCommits[0].Hash, hash)
if err != nil {
return fmt.Errorf("retrieving commits %q to %q: %w", mbCommits[0].Hash, hash, err)
}
return proj.VerifyCommits(branchName, commits)
}