package dehub import ( "bytes" "encoding/base64" "errors" "fmt" "reflect" "sort" "strings" "time" "dehub.dev/src/dehub.git/accessctl" "dehub.dev/src/dehub.git/fs" "dehub.dev/src/dehub.git/sigcred" "dehub.dev/src/dehub.git/typeobj" "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" yaml "gopkg.in/yaml.v2" ) // CommitInterface describes the methods which must be implemented by the // different commit types. None of the methods should modify the underlying // object. type CommitInterface interface { // MessageHead returns the head of the commit message (i.e. the first line). // The CommitCommon of the outer Commit is passed in for added context, if // necessary. MessageHead(CommitCommon) (string, error) // ExpectedHash returns the raw hash which Signifiers can sign to accredit // this commit. The ChangedFile objects given describe the file changes // between the parent commit and this commit. ExpectedHash([]ChangedFile) ([]byte, error) // StoredHash returns the signable Hash embedded in the commit, which should // hopefully correspond to the ExpectedHash. StoredHash() []byte } // CommitCommon describes the fields common to all Commit objects. type CommitCommon struct { // Credentials represent all created Credentials for this commit, and can be // set on all Commit objects regardless of other fields being set. Credentials []sigcred.Credential `yaml:"credentials"` } func (cc CommitCommon) credIDs() []string { m := map[string]struct{}{} for _, cred := range cc.Credentials { if cred.AccountID != "" { m[cred.AccountID] = struct{}{} } else if cred.AnonID != "" { m[cred.AnonID] = struct{}{} } } s := make([]string, 0, len(m)) for id := range m { s = append(s, id) } sort.Strings(s) return s } func abbrevCommitMessage(msg string) string { i := strings.Index(msg, "\n") if i > 0 { msg = msg[:i] } if len(msg) > 80 { msg = msg[:80] + "..." } return msg } // Commit represents a single Commit which is being added to a branch. Only one // field should be set on a Commit, unless otherwise noted. type Commit struct { Change *CommitChange `type:"change,default"` Credential *CommitCredential `type:"credential"` Comment *CommitComment `type:"comment"` Common CommitCommon `yaml:",inline"` } // MarshalYAML implements the yaml.Marshaler interface. func (c Commit) MarshalYAML() (interface{}, error) { return typeobj.MarshalYAML(c) } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *Commit) UnmarshalYAML(unmarshal func(interface{}) error) error { return typeobj.UnmarshalYAML(c, unmarshal) } // Interface returns the CommitInterface instance encapsulated by this Commit // object. func (c Commit) Interface() (CommitInterface, error) { el, _, err := typeobj.Element(c) if err != nil { return nil, err } return el.(CommitInterface), nil } // Type returns the Commit's type (as would be used in its YAML "type" field). func (c Commit) Type() (string, error) { _, typeStr, err := typeobj.Element(c) if err != nil { return "", err } return typeStr, nil } // MarshalText implements the encoding.TextMarshaler interface by returning the // form the Commit object takes in the git commit message. func (c Commit) MarshalText() ([]byte, error) { commitInt, err := c.Interface() if err != nil { return nil, fmt.Errorf("could not cast Commit %+v to interface : %w", c, err) } msgHead, err := commitInt.MessageHead(c.Common) if err != nil { return nil, fmt.Errorf("error constructing message head: %w", err) } msgBodyB, err := yaml.Marshal(c) if err != nil { return nil, fmt.Errorf("error marshaling commit %+v as yaml: %w", c, err) } w := new(bytes.Buffer) w.WriteString(msgHead) w.WriteString("\n\n---\n") w.Write(msgBodyB) return w.Bytes(), nil } // UnmarshalText implements the encoding.TextUnmarshaler interface by decoding a // Commit object which has been encoded into a git commit message. func (c *Commit) UnmarshalText(msg []byte) error { i := bytes.Index(msg, []byte("\n")) if i < 0 { return fmt.Errorf("commit message %q is malformed, it has no body", msg) } msgBody := msg[i:] if err := yaml.Unmarshal(msgBody, c); err != nil { return fmt.Errorf("could not unmarshal Commit message from yaml: %w", err) } else if reflect.DeepEqual(*c, Commit{}) { // a basic check, but worthwhile return errors.New("commit message is malformed, could not unmarshal yaml object") } return nil } // AccreditCommit returns the given Commit with an appended Credential provided // by the given SignifierInterface. func (r *Repo) AccreditCommit(commit Commit, sigInt sigcred.SignifierInterface) (Commit, error) { commitInt, err := commit.Interface() if err != nil { return commit, fmt.Errorf("could not cast commit %+v to interface: %w", commit, err) } headFS, err := r.headFS() if err != nil { return commit, fmt.Errorf("could not grab snapshot of HEAD fs: %w", err) } cred, err := sigInt.Sign(headFS, commitInt.StoredHash()) if err != nil { return commit, fmt.Errorf("could not accredit change commit: %w", err) } commit.Common.Credentials = append(commit.Common.Credentials, cred) return commit, nil } // CommitBareParams are the parameters to the CommitBare method. All are // required, unless otherwise noted. type CommitBareParams struct { Commit Commit Author string ParentHash plumbing.Hash // can be zero if the commit has no parents (Q_Q) GitTree *object.Tree } // CommitBare constructs a git commit object and and stores it, returning the // resulting GitCommit. This method does not interact with HEAD at all. func (r *Repo) CommitBare(params CommitBareParams) (GitCommit, error) { msgB, err := params.Commit.MarshalText() if err != nil { return GitCommit{}, fmt.Errorf("encoding %T to message string: %w", params.Commit, err) } author := object.Signature{ Name: params.Author, When: time.Now(), } commit := &object.Commit{ Author: author, Committer: author, Message: string(msgB), TreeHash: params.GitTree.Hash, } if params.ParentHash != plumbing.ZeroHash { commit.ParentHashes = []plumbing.Hash{params.ParentHash} } commitObj := r.GitRepo.Storer.NewEncodedObject() if err := commit.Encode(commitObj); err != nil { return GitCommit{}, fmt.Errorf("encoding commit object: %w", err) } commitHash, err := r.GitRepo.Storer.SetEncodedObject(commitObj) if err != nil { return GitCommit{}, fmt.Errorf("setting encoded object: %w", err) } return r.GetGitCommit(commitHash) } // Commit uses the given Commit to create a git commit object and commits it to // the current HEAD, returning the full GitCommit. func (r *Repo) Commit(commit Commit) (GitCommit, error) { headRef, err := r.TraverseReferenceChain(plumbing.HEAD, func(ref *plumbing.Reference) bool { return ref.Type() == plumbing.HashReference }) if err != nil { return GitCommit{}, fmt.Errorf("resolving HEAD to a hash reference: %w", err) } headRefName := headRef.Name() headHash, err := r.ReferenceToHash(headRefName) if err != nil { return GitCommit{}, fmt.Errorf("resolving ref %q (HEAD): %w", headRefName, err) } // TODO this is also used in the same way in NewCommitChange. It might make // sense to refactor this logic out, it might not be needed in fs at all. _, stagedTree, err := fs.FromStagedChangesTree(r.GitRepo) if err != nil { return GitCommit{}, fmt.Errorf("getting staged changes: %w", err) } gitCommit, err := r.CommitBare(CommitBareParams{ Commit: commit, Author: strings.Join(commit.Common.credIDs(), ", "), ParentHash: headHash, GitTree: stagedTree, }) if err != nil { return GitCommit{}, err } // now set the branch to this new commit newHeadRef := plumbing.NewHashReference(headRefName, gitCommit.GitCommit.Hash) if err := r.GitRepo.Storer.SetReference(newHeadRef); err != nil { return GitCommit{}, fmt.Errorf("setting reference %q to new commit hash %q: %w", headRefName, gitCommit.GitCommit.Hash, err) } return gitCommit, nil } // HasStagedChanges returns true if there are file changes which have been // staged (e.g. via "git add"). func (r *Repo) HasStagedChanges() (bool, error) { w, err := r.GitRepo.Worktree() if err != nil { return false, fmt.Errorf("error retrieving worktree: %w", err) } status, err := w.Status() if err != nil { return false, fmt.Errorf("error retrieving worktree status: %w", err) } var any bool for _, fileStatus := range status { if fileStatus.Staging != git.Unmodified && fileStatus.Staging != git.Untracked { any = true break } } return any, nil } // VerifyCommits verifies that the given commits, which are presumably on the // given branch, are gucci. func (r *Repo) VerifyCommits(branch plumbing.ReferenceName, gitCommits []GitCommit) error { // First determine the root of the main branch. All commits need to be an // ancestor of it. var root plumbing.Hash mainGitCommit, err := r.GetGitRevision(plumbing.Revision(MainRefName)) if err != nil { return fmt.Errorf("retrieving commit at HEAD of main: %w", err) } rootCommit := mainGitCommit.GitCommit for { if rootCommit.NumParents() == 0 { break } else if rootCommit.NumParents() > 1 { return fmt.Errorf("commit %q in main branch has more than one parent", root) } else if rootCommit, err = rootCommit.Parent(0); err != nil { return fmt.Errorf("retrieving parent commit of %q: %w", root, err) } } for i, gitCommit := range gitCommits { // It's not a requirement that the given GitCommits are in ancestral // order, but usually they are, so we can help verifyCommit not have to // calculate the parentTree if the previous commit is the parent of this // one, and not have to determine that each commit is an ancestor of // main manually. var parentTree *object.Tree if i > 0 && gitCommits[i-1].GitCommit.Hash == gitCommit.GitCommit.ParentHashes[0] { parentTree = gitCommits[i-1].GitTree } else if gitCommit.GitCommit.Hash == rootCommit.Hash { // looking at the root commit itself, assume it's ok } else if isAncestor, err := rootCommit.IsAncestor(gitCommit.GitCommit); err != nil { return fmt.Errorf("determining if %q is an ancestor of %q (root of main): %w", gitCommit.GitCommit.Hash, rootCommit.Hash, err) } else if !isAncestor { return fmt.Errorf("%q is not an ancestor of %q (root of main)", gitCommit.GitCommit.Hash, rootCommit.Hash) } if err := r.verifyCommit(branch, gitCommit, parentTree); err != nil { return fmt.Errorf("verifying commit %q: %w", gitCommit.GitCommit.Hash, err) } } return nil } // parentTree returns the tree of the parent commit of the given commit. If the // given commit has no parents then a bare tree is returned. func (r *Repo) parentTree(commitObj *object.Commit) (*object.Tree, error) { switch commitObj.NumParents() { case 0: return new(object.Tree), nil case 1: if parentCommitObj, err := commitObj.Parent(0); err != nil { return nil, fmt.Errorf("getting parent commit %q: %w", commitObj.ParentHashes[0], err) } else if parentTree, err := r.GitRepo.TreeObject(parentCommitObj.TreeHash); err != nil { return nil, fmt.Errorf("getting parent tree object %q: %w", parentCommitObj.TreeHash, err) } else { return parentTree, nil } default: return nil, errors.New("commit has multiple parents") } } // if parentTree is nil then it will be inferred. func (r *Repo) verifyCommit(branch plumbing.ReferenceName, gitCommit GitCommit, parentTree *object.Tree) error { parentTree, err := r.parentTree(gitCommit.GitCommit) if err != nil { return fmt.Errorf("retrieving parent tree of commit: %w", err) } var sigFS fs.FS if gitCommit.Root() { sigFS = fs.FromTree(gitCommit.GitTree) } else { sigFS = fs.FromTree(parentTree) } cfg, err := r.loadConfig(sigFS) if err != nil { return fmt.Errorf("loading config of parent %q: %w", gitCommit.GitCommit.ParentHashes[0], err) } // assert access controls changedFiles, err := ChangedFilesBetweenTrees(parentTree, gitCommit.GitTree) if err != nil { return fmt.Errorf("calculating diff from tree %q to tree %q: %w", parentTree.Hash, gitCommit.GitTree.Hash, err) } else if len(changedFiles) > 0 && gitCommit.Commit.Change == nil { return errors.New("files changes but commit is not a change commit") } pathsChanged := make([]string, len(changedFiles)) for i := range changedFiles { pathsChanged[i] = changedFiles[i].Path } commitType, err := gitCommit.Commit.Type() if err != nil { return fmt.Errorf("determining type of commit %+v: %w", gitCommit.Commit, err) } err = accessctl.AssertCanCommit(cfg.AccessControls, accessctl.CommitRequest{ Type: commitType, Branch: branch.Short(), Credentials: gitCommit.Commit.Common.Credentials, FilesChanged: pathsChanged, }) if err != nil { return fmt.Errorf("asserting access controls: %w", err) } // ensure the hash is what it's expected to be storedCommitHash := gitCommit.Interface.StoredHash() expectedCommitHash, err := gitCommit.Interface.ExpectedHash(changedFiles) if err != nil { return fmt.Errorf("calculating expected commit hash: %w", err) } else if !bytes.Equal(storedCommitHash, expectedCommitHash) { return fmt.Errorf("unexpected hash in commit body, is %s but should be %s", base64.StdEncoding.EncodeToString(storedCommitHash), base64.StdEncoding.EncodeToString(expectedCommitHash)) } // verify all credentials for _, cred := range gitCommit.Commit.Common.Credentials { if cred.AccountID == "" { if err := cred.SelfVerify(expectedCommitHash); err != nil { return fmt.Errorf("verifying credential %+v: %w", cred, err) } } else { sig, err := r.signifierForCredential(sigFS, cred) if err != nil { return fmt.Errorf("finding signifier for credential %+v: %w", cred, err) } else if err := sig.Verify(sigFS, expectedCommitHash, cred); err != nil { return fmt.Errorf("verifying credential %+v: %w", cred, err) } } } return nil } type changeRangeInfo struct { changeCommits []GitCommit authors map[string]struct{} msg string startTree, endTree *object.Tree changeHash []byte } // changeRangeInfo returns various pieces of information about a range of // commits' changes. func (r *Repo) changeRangeInfo(commits []GitCommit) (changeRangeInfo, error) { info := changeRangeInfo{ authors: map[string]struct{}{}, } for _, commit := range commits { if _, ok := commit.Interface.(*CommitChange); ok { info.changeCommits = append(info.changeCommits, commit) for _, cred := range commit.Commit.Common.Credentials { info.authors[cred.AccountID] = struct{}{} } } } if len(info.changeCommits) == 0 { return changeRangeInfo{}, errors.New("no change commits found") } // startTree has to be the tree of the parent of the first commit, which // isn't included in commits. Determine it the hard way. var err error if info.startTree, err = r.parentTree(commits[0].GitCommit); err != nil { return changeRangeInfo{}, fmt.Errorf("getting tree of parent of %q: %w", commits[0].GitCommit.Hash, err) } lastChangeCommit := info.changeCommits[len(info.changeCommits)-1] info.msg = lastChangeCommit.Commit.Change.Message info.endTree = lastChangeCommit.GitTree changedFiles, err := ChangedFilesBetweenTrees(info.startTree, info.endTree) if err != nil { return changeRangeInfo{}, fmt.Errorf("calculating diff of commit trees %q and %q: %w", info.startTree.Hash, info.endTree.Hash, err) } info.changeHash = genChangeHash(nil, info.msg, changedFiles) return info, nil }