package dehub import ( "bytes" "errors" "fmt" "sort" "strings" "time" "dehub.dev/src/dehub.git/accessctl" "dehub.dev/src/dehub.git/fs" "dehub.dev/src/dehub.git/sigcred" "dehub.dev/src/dehub.git/typeobj" "dehub.dev/src/dehub.git/yamlutil" "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" yaml "gopkg.in/yaml.v2" ) // Payload describes the methods which must be implemented by the different // payload types. None of the methods should modify the underlying object. type Payload interface { // MessageHead returns the head of the commit message (i.e. the first line). // The PayloadCommon of the outer PayloadUnion is passed in for added // context, if necessary. MessageHead(PayloadCommon) string // Fingerprint returns the raw fingerprint which can be signed when // accrediting this payload. The ChangedFile objects given describe the file // changes between the parent commit and this commit. // // If this method returns nil it means that the payload has no fingerprint // in-and-of-itself. Fingerprint([]ChangedFile) ([]byte, error) } // PayloadCommon describes the fields common to all Payloads. type PayloadCommon struct { Fingerprint yamlutil.Blob `yaml:"fingerprint"` Credentials []sigcred.CredentialUnion `yaml:"credentials"` // LegacyChangeHash is no longer used, use Fingerprint instead. LegacyChangeHash yamlutil.Blob `yaml:"change_hash,omitempty"` } func (cc PayloadCommon) credIDs() []string { m := map[string]struct{}{} for _, cred := range cc.Credentials { if cred.AccountID != "" { m[cred.AccountID] = struct{}{} } else if cred.AnonID != "" { m[cred.AnonID] = struct{}{} } } s := make([]string, 0, len(m)) for id := range m { s = append(s, id) } sort.Strings(s) return s } func abbrevCommitMessage(msg string) string { i := strings.Index(msg, "\n") if i > 0 { msg = msg[:i] } if len(msg) > 80 { msg = msg[:77] + "..." } return msg } // PayloadUnion represents a single Payload of variable type. Only one field // should be set on a PayloadUnion, unless otherwise noted. type PayloadUnion struct { Change *PayloadChange `type:"change,default"` Credential *PayloadCredential `type:"credential"` Comment *PayloadComment `type:"comment"` // Common may be set in addition to one of the other fields. Common PayloadCommon `yaml:",inline"` } // MarshalYAML implements the yaml.Marshaler interface. func (p PayloadUnion) MarshalYAML() (interface{}, error) { return typeobj.MarshalYAML(p) } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (p *PayloadUnion) UnmarshalYAML(unmarshal func(interface{}) error) error { if err := typeobj.UnmarshalYAML(p, unmarshal); err != nil { return err } else if len(p.Common.LegacyChangeHash) > 0 { p.Common.Fingerprint = p.Common.LegacyChangeHash p.Common.LegacyChangeHash = nil } return nil } // Payload returns the Payload instance encapsulated by this PayloadUnion. // // This will panic if a Payload field is not populated. func (p PayloadUnion) Payload() Payload { el, _, err := typeobj.Element(p) if err != nil { panic(err) } return el.(Payload) } // Type returns the Payload's type (as would be used in its YAML "type" field). // // This will panic if a Payload field is not populated. func (p PayloadUnion) Type() string { _, typeStr, err := typeobj.Element(p) if err != nil { panic(err) } return typeStr } // MarshalText implements the encoding.TextMarshaler interface by returning the // form the payload in the git commit message. func (p PayloadUnion) MarshalText() ([]byte, error) { msgHead := abbrevCommitMessage(p.Payload().MessageHead(p.Common)) msgBodyB, err := yaml.Marshal(p) if err != nil { return nil, fmt.Errorf("marshaling payload %+v as yaml: %w", p, err) } w := new(bytes.Buffer) w.WriteString(msgHead) w.WriteString("\n\n---\n") w.Write(msgBodyB) return w.Bytes(), nil } // UnmarshalText implements the encoding.TextUnmarshaler interface by decoding a // payload object which has been encoded into a git commit message. func (p *PayloadUnion) UnmarshalText(msg []byte) error { i := bytes.Index(msg, []byte("\n")) if i < 0 { return fmt.Errorf("commit message %q is malformed, it has no body", msg) } msgBody := msg[i:] if err := yaml.Unmarshal(msgBody, p); err != nil { return fmt.Errorf("unmarshaling commit payload from yaml: %w", err) } return nil } // AccreditPayload returns the given PayloadUnion with an appended Credential // provided by the given SignifierInterface. func (proj *Project) AccreditPayload(payUn PayloadUnion, sig sigcred.Signifier) (PayloadUnion, error) { headFS, err := proj.headFS() if err != nil { return payUn, fmt.Errorf("retrieving HEAD fs: %w", err) } cred, err := sig.Sign(headFS, payUn.Common.Fingerprint) if err != nil { return payUn, fmt.Errorf("signing fingerprint %q: %w", payUn.Common.Fingerprint, err) } payUn.Common.Credentials = append(payUn.Common.Credentials, cred) return payUn, nil } // CommitDirectParams are the parameters to the CommitDirect method. All are // required, unless otherwise noted. type CommitDirectParams struct { PayloadUnion PayloadUnion Author string ParentHash plumbing.Hash // can be zero if the commit has no parents (Q_Q) GitTree *object.Tree } // CommitDirect constructs a git commit object and and stores it, returning the // resulting Commit. This method does not interact with HEAD at all. func (proj *Project) CommitDirect(params CommitDirectParams) (Commit, error) { msgB, err := params.PayloadUnion.MarshalText() if err != nil { return Commit{}, fmt.Errorf("encoding payload to message string: %w", err) } author := object.Signature{ Name: params.Author, When: time.Now(), } commit := &object.Commit{ Author: author, Committer: author, Message: string(msgB), TreeHash: params.GitTree.Hash, } if params.ParentHash != plumbing.ZeroHash { commit.ParentHashes = []plumbing.Hash{params.ParentHash} } commitObj := proj.GitRepo.Storer.NewEncodedObject() if err := commit.Encode(commitObj); err != nil { return Commit{}, fmt.Errorf("encoding commit object: %w", err) } commitHash, err := proj.GitRepo.Storer.SetEncodedObject(commitObj) if err != nil { return Commit{}, fmt.Errorf("setting encoded object: %w", err) } return proj.GetCommit(commitHash) } // Commit uses the given PayloadUnion to create a git commit object and commits // it to the current HEAD, returning the full Commit. func (proj *Project) Commit(payUn PayloadUnion) (Commit, error) { headRef, err := proj.TraverseReferenceChain(plumbing.HEAD, func(ref *plumbing.Reference) bool { return ref.Type() == plumbing.HashReference }) if err != nil { return Commit{}, fmt.Errorf("resolving HEAD to a hash reference: %w", err) } headRefName := headRef.Name() headHash, err := proj.ReferenceToHash(headRefName) if err != nil { return Commit{}, fmt.Errorf("resolving ref %q (HEAD): %w", headRefName, err) } // TODO this is also used in the same way in NewCommitChange. It might make // sense to refactor this logic out, it might not be needed in fs at all. _, stagedTree, err := fs.FromStagedChangesTree(proj.GitRepo) if err != nil { return Commit{}, fmt.Errorf("getting staged changes: %w", err) } commit, err := proj.CommitDirect(CommitDirectParams{ PayloadUnion: payUn, Author: strings.Join(payUn.Common.credIDs(), ", "), ParentHash: headHash, GitTree: stagedTree, }) if err != nil { return Commit{}, err } // now set the branch to this new commit newHeadRef := plumbing.NewHashReference(headRefName, commit.Hash) if err := proj.GitRepo.Storer.SetReference(newHeadRef); err != nil { return Commit{}, fmt.Errorf("setting reference %q to new commit hash %q: %w", headRefName, commit.Hash, err) } return commit, nil } // HasStagedChanges returns true if there are file changes which have been // staged (e.g. via "git add"). func (proj *Project) HasStagedChanges() (bool, error) { w, err := proj.GitRepo.Worktree() if err != nil { return false, fmt.Errorf("retrieving worktree: %w", err) } status, err := w.Status() if err != nil { return false, fmt.Errorf("retrieving worktree status: %w", err) } var any bool for _, fileStatus := range status { if fileStatus.Staging != git.Unmodified && fileStatus.Staging != git.Untracked { any = true break } } return any, nil } // VerifyCommits verifies that the given commits, which are presumably on the // given branch, are gucci. func (proj *Project) VerifyCommits(branchName plumbing.ReferenceName, commits []Commit) error { // this isn't strictly necessary for this method, but it helps discover bugs // in other parts of the code. if len(commits) == 0 { return errors.New("cannot call VerifyCommits with empty commit slice") } // First determine the root of the main branch. All commits need to be an // ancestor of it. If the main branch has not been created yet then there // might not be a root commit yet. var rootCommitObj *object.Commit mainCommit, err := proj.GetCommitByRevision(plumbing.Revision(MainRefName)) if errors.Is(err, plumbing.ErrReferenceNotFound) { // main branch hasn't been created yet. The commits can only be verified // if they are for the main branch and they include the root commit. if branchName != MainRefName { return fmt.Errorf("cannot verify commits in branch %q when no main branch exists", branchName) } for _, commit := range commits { if commit.Object.NumParents() == 0 { rootCommitObj = commit.Object break } } if rootCommitObj == nil { return errors.New("root commit of main branch cannot be determined") } } else if err != nil { return fmt.Errorf("retrieving commit at HEAD of %q: %w", MainRefName.Short(), err) } else { rootCommitObj = mainCommit.Object for { if rootCommitObj.NumParents() == 0 { break } else if rootCommitObj.NumParents() > 1 { return fmt.Errorf("commit %q in main branch has more than one parent", rootCommitObj.Hash) } else if rootCommitObj, err = rootCommitObj.Parent(0); err != nil { return fmt.Errorf("retrieving parent commit of %q: %w", rootCommitObj.Hash, err) } } } // We also need the HEAD of the given branch, if it exists. branchCommit, err := proj.GetCommitByRevision(plumbing.Revision(branchName)) if err != nil && !errors.Is(err, plumbing.ErrReferenceNotFound) { return fmt.Errorf("retrieving commit at HEAD of %q: %w", branchName.Short(), err) } for i, commit := range commits { // It's not a requirement that the given Commits are in ancestral order, // but usually they are; if the previous commit is the parent of this // one we can skip a bunch of work. var parentTree *object.Tree var isNonFF bool if i > 0 && commits[i-1].Hash == commit.Object.ParentHashes[0] { parentTree = commits[i-1].TreeObject } else if commit.Hash == rootCommitObj.Hash { // looking at the root commit, assume it's ok } else { var err error isAncestor := func(older, younger *object.Commit) bool { var isAncestor bool if err != nil { return false } else if isAncestor, err = older.IsAncestor(younger); err != nil { err = fmt.Errorf("determining if %q is an ancestor of %q: %w", younger.Hash, older.Hash, err) return false } return isAncestor } ancestorOfRoot := isAncestor(rootCommitObj, commit.Object) if branchCommit.Hash != plumbing.ZeroHash { // checking if the var was set // this could only be a nonFF if the branch actually exists. isNonFF = !isAncestor(branchCommit.Object, commit.Object) } if err != nil { return err } else if !ancestorOfRoot { return fmt.Errorf("commit %q must be direct descendant of root commit of %q (%q)", commit.Hash, MainRefName.Short(), rootCommitObj.Hash, ) } } if err := proj.verifyCommit(branchName, commit, parentTree, isNonFF); err != nil { return fmt.Errorf("verifying commit %q: %w", commit.Hash, err) } } return nil } // parentTree returns the tree of the parent commit of the given commit. If the // given commit has no parents then a bare tree is returned. func (proj *Project) parentTree(commitObj *object.Commit) (*object.Tree, error) { switch commitObj.NumParents() { case 0: return new(object.Tree), nil case 1: if parentCommitObj, err := commitObj.Parent(0); err != nil { return nil, fmt.Errorf("getting parent commit %q: %w", commitObj.ParentHashes[0], err) } else if parentTree, err := proj.GitRepo.TreeObject(parentCommitObj.TreeHash); err != nil { return nil, fmt.Errorf("getting parent tree object %q: %w", parentCommitObj.TreeHash, err) } else { return parentTree, nil } default: return nil, errors.New("commit has multiple parents") } } // if parentTree is nil then it will be inferred. func (proj *Project) verifyCommit( branchName plumbing.ReferenceName, commit Commit, parentTree *object.Tree, isNonFF bool, ) error { if parentTree == nil { var err error if parentTree, err = proj.parentTree(commit.Object); err != nil { return fmt.Errorf("retrieving parent tree of commit: %w", err) } } var sigFS fs.FS if commit.Object.NumParents() == 0 { sigFS = fs.FromTree(commit.TreeObject) } else { sigFS = fs.FromTree(parentTree) } cfg, err := proj.loadConfig(sigFS) if err != nil { return fmt.Errorf("loading config of parent %q: %w", commit.Object.ParentHashes[0], err) } // assert access controls changedFiles, err := ChangedFilesBetweenTrees(parentTree, commit.TreeObject) if err != nil { return fmt.Errorf("calculating diff from tree %q to tree %q: %w", parentTree.Hash, commit.TreeObject.Hash, err) } else if len(changedFiles) > 0 && commit.Payload.Change == nil { return errors.New("files changes but commit is not a change commit") } pathsChanged := make([]string, len(changedFiles)) for i := range changedFiles { pathsChanged[i] = changedFiles[i].Path } commitType := commit.Payload.Type() err = accessctl.AssertCanCommit(cfg.AccessControls, accessctl.CommitRequest{ Type: commitType, Branch: branchName.Short(), Credentials: commit.Payload.Common.Credentials, FilesChanged: pathsChanged, NonFastForward: isNonFF, }) if err != nil { return fmt.Errorf("asserting access controls: %w", err) } // ensure the fingerprint is what it's expected to be storedFingerprint := commit.Payload.Common.Fingerprint expectedFingerprint, err := commit.Payload.Payload().Fingerprint(changedFiles) if err != nil { return fmt.Errorf("calculating expected payload fingerprint: %w", err) } else if expectedFingerprint == nil { // the payload doesn't have a fingerprint of its own, it's just carrying // one, so no point in checking if it's "correct". } else if !bytes.Equal(storedFingerprint, expectedFingerprint) { return fmt.Errorf("unexpected fingerprint in payload, is %q but should be %q", storedFingerprint, yamlutil.Blob(expectedFingerprint)) } // verify all credentials for _, cred := range commit.Payload.Common.Credentials { if cred.AccountID == "" { if err := cred.SelfVerify(storedFingerprint); err != nil { return fmt.Errorf("verifying credential %+v: %w", cred, err) } } else { sig, err := proj.signifierForCredential(sigFS, cred) if err != nil { return fmt.Errorf("finding signifier for credential %+v: %w", cred, err) } else if err := sig.Verify(sigFS, storedFingerprint, cred); err != nil { return fmt.Errorf("verifying credential %+v: %w", cred, err) } } } return nil } // LastChangeDescription iterates over the given commits in reverse order and // returns the first change description it comes across. A change description // may come from a change payload or a credential payload which covers a set of // changes. // // This function will return an error if no given commits contain a change // description. func LastChangeDescription(commits []Commit) (string, error) { for i := range commits { i = len(commits) - 1 - i payUn := commits[i].Payload if payUn.Change != nil { return payUn.Change.Description, nil } else if payUn.Credential != nil && payUn.Credential.ChangeDescription != "" { return payUn.Credential.ChangeDescription, nil } } return "", errors.New("no commits in range contain a change description") } type changeRangeInfo struct { changeCommits []Commit authors map[string]struct{} startTree, endTree *object.Tree changeDescription string } // changeRangeInfo returns various pieces of information about a range of // commits' changes. func (proj *Project) changeRangeInfo(commits []Commit) (changeRangeInfo, error) { info := changeRangeInfo{ authors: map[string]struct{}{}, } for _, commit := range commits { if commit.Payload.Change != nil { info.changeCommits = append(info.changeCommits, commit) for _, cred := range commit.Payload.Common.Credentials { info.authors[cred.AccountID] = struct{}{} } } } if len(info.changeCommits) == 0 { return changeRangeInfo{}, errors.New("no change commits found in range") } // startTree has to be the tree of the parent of the first commit, which // isn't included in commits. Determine it the hard way. var err error if info.startTree, err = proj.parentTree(commits[0].Object); err != nil { return changeRangeInfo{}, fmt.Errorf("getting tree of parent of %q: %w", commits[0].Hash, err) } else if info.changeDescription, err = LastChangeDescription(commits); err != nil { return changeRangeInfo{}, err } lastChangeCommit := info.changeCommits[len(info.changeCommits)-1] info.endTree = lastChangeCommit.TreeObject return info, nil } func (info changeRangeInfo) changeFingerprint(descr string) ([]byte, error) { changedFiles, err := ChangedFilesBetweenTrees(info.startTree, info.endTree) if err != nil { return nil, fmt.Errorf("calculating diff of commit trees %q and %q: %w", info.startTree.Hash, info.endTree.Hash, err) } return genChangeFingerprint(nil, descr, changedFiles), nil } // VerifyCanSetBranchHEADTo is used to verify that a branch's HEAD can be set to // the given hash. It verifies any new commits which are being added, and // handles verifying non-fast-forward commits as well. // // If the given hash matches the current HEAD of the branch then this performs // no further checks and returns nil. func (proj *Project) VerifyCanSetBranchHEADTo(branchName plumbing.ReferenceName, hash plumbing.Hash) error { oldCommitRef, err := proj.GitRepo.Reference(branchName, true) if errors.Is(err, plumbing.ErrReferenceNotFound) { // if the branch is being created then just pull all of its commits and // verify them. // TODO optimize this so that it tries to use the merge-base with main, // so we're not re-verifying a ton of commits unecessarily commits, err := proj.GetCommitRange(plumbing.ZeroHash, hash) if err != nil { return fmt.Errorf("retrieving %q and all its ancestors: %w", hash, err) } return proj.VerifyCommits(branchName, commits) } else if err != nil { return fmt.Errorf("resolving branch reference to a hash: %w", err) } else if oldCommitRef.Hash() == hash { // if the HEAD is already at the given hash then it must be fine. return nil } oldCommitObj, err := proj.GitRepo.CommitObject(oldCommitRef.Hash()) if err != nil { return fmt.Errorf("retrieving commit object %q: %w", oldCommitRef.Hash(), err) } newCommit, err := proj.GetCommit(hash) if err != nil { return fmt.Errorf("retrieving commit %q: %w", hash, err) } if isAncestor, err := newCommit.Object.IsAncestor(oldCommitObj); err != nil { return fmt.Errorf("determining if %q is an ancestor of %q: %w", newCommit.Hash, oldCommitObj.Hash, err) } else if isAncestor { // if the new commit is an ancestor of the old one then the branch is // being force-pushed to a previous commit. This is weird to handle // using VerifyCommits, so just call verifyCommit directly. return proj.verifyCommit(branchName, newCommit, nil, true) } mbCommits, err := oldCommitObj.MergeBase(newCommit.Object) if err != nil { return fmt.Errorf("determining merge-base between %q and %q: %w", oldCommitObj.Hash, newCommit.Hash, err) } else if len(mbCommits) == 0 { return fmt.Errorf("%q and %q have no ancestors in common", oldCommitObj.Hash, newCommit.Hash) } else if len(mbCommits) == 2 { return fmt.Errorf("%q and %q have more than one ancestor in common", oldCommitObj.Hash, newCommit.Hash) } commits, err := proj.GetCommitRange(mbCommits[0].Hash, hash) if err != nil { return fmt.Errorf("retrieving commits %q to %q: %w", mbCommits[0].Hash, hash, err) } return proj.VerifyCommits(branchName, commits) }