From 351048e9aabef7dc0f99b00f02547e409859a33f Mon Sep 17 00:00:00 2001 From: mediocregopher <> Date: Sat, 25 Apr 2020 15:17:21 -0600 Subject: [PATCH] Completely rewrite SPEC --- type: change message: |- Completely rewrite SPEC It's good this time, and complete. After this rewrite it will be necessary to update a lot of the code, since quite a few things got renamed. change_hash: AG0s3yILU+0uIZltVY7A9/cgxr/pXk2MzGwExsY/hbIc credentials: - type: pgp_signature pub_key_id: 95C46FA6A41148AC body: iQIzBAABAgAdFiEEJ6tQKp6olvZKJ0lwlcRvpqQRSKwFAl6kqNsACgkQlcRvpqQRSKztwQ//R3maov9GxmLAAl1jF34ZWqS6et5EmMMDRH8KtD6PB05r9j+Z1etMbaW/dAcXzXcXZpEWmj7xhfR67nyFL5yYEGcBnEUGcEMD9uMOF3QrKrBKmvsDUinIsWQV48cu0R8hY6akqhklYbK/W/S1PXdgBOn+hLuFyoN1Xa9NiHIZSfYwEYrEWE7Pg769lqtEugRv/nIYdO3slEDdubl4bGCf1gt9gowEprod7xaieil8TlKBCFNNR6Eb+GmyhyRBymUR8ANqMUgOzW1hcmRyUFAVSOPraM0Lrze8+nYzvXINqsswp98ZWXz+tOKCHkjRox/HrLsDEhbws4e6S6ngtnFNDEMi9idJH5jYcoH89RLUUggCDbehEF6vfb2En1QbCSIkxR/uAYoQo0pxXMzEdgSjSPNltOvE6I0qWyX9RcSFr/vbVx8C5TDmchZt6CMnlioa27B2SWJxfs5opfHHIHSEQyI4keaDOx6RgU5TfzjBVqCLFjWRiYrxu/ZlNuQDRyTnHKjBnIkhOvcjh78/iBv0Vdkns6R3yE7X9TxKnPs4tfhFOa+ftg2bNjIQS3IN2hLlrsJuRYiImL34zj1s0NbALFVRQCcyY190BnTN3FCqzKTUbriuoo/MeAWW/soPpy9HUhWGh3KdcmqZbfNaosiMIDirwEkPtLYMXHfPH7NfZ9w= account: mediocregopher --- ROADMAP.md | 3 - SPEC.md | 614 +++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 431 insertions(+), 186 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index c3a6bbd..783c135 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -10,10 +10,7 @@ Must be able to feel good about showing the project publicly, as well as be able to accept help from people asking to help. * Figure out commit range syntax, use that everywhere. -* Create a branch which is just a public "welcome thread", which can be part of - the tutorials. * Tutorials -* Update SPEC; it's sloppy, out-of-date, and incomplete. * Maybe move external host? ## Milestone: IPFS support diff --git a/SPEC.md b/SPEC.md index bc1a441..462821d 100644 --- a/SPEC.md +++ b/SPEC.md @@ -1,245 +1,493 @@ # SPEC -This document attempts to describe, at minimum, every aspect of the dehub -protocol which would be required to know in order to create an alternate -implementation of dehub. It is currently sloppily written and incomplete. +This document describes the dehub protocol. -## .dehub +This document assumes that the reader is familiar with git, both conceptually +and in practical use of the git tool. All references to a git-specific concept +retain their meaning; dehub concepts build upon git concepts, but do not +override them. -The `.dehub` directory contains all meta information related to -decentralized repository management and access control. +## dehub Project -### config.yml +A dehub project is comprised of: -The `.dehub/config.yml` file takes the following structure: - -```yaml -# accounts defines all accounts which are known to the repo. -accounts: - - # Each account is an object with an id and at least one identifier. The id - # must be unique for each account. - - id: some_user_id: - - # signifiers describes different methods the account might use to - # identify itself. Generally, these will be different public keys which - # commits will be signed with. At least one is required. - signifiers: - - type: "pgp_public_key" - body: "FULL PGP PUBLIC KEY STRING" - - - type: "pgp_public_key_file" - path: ".dehub/some_user_id.asc" - - - type: "keybase" - user: "some_keybase_user_id" - -# access_controls define who may do what in the repo. The value is a list of -# access control objects, each containing an action (allow or deny) and a set of -# filters. If a commit matches all filters (or if there are no filters) then the -# action is taken. If not, then the next access control is attempted. -# -# If no access controls match a commit, then the default list is used, which -# will definitely match. The following is the default set, which is enumerated -# here for informational purposes only; it does not normally need to be defined. -access_controls: - - action: allow - filters: - - type: not - filter: - type: branch - pattern: main - - type: signature - any_account: true - count: 1 - - - action: allow - filters: - - type: branch - pattern: main - - type: commit_type - commit_type: change - - type: signature - any_account: true - count: 1 +* A collection of files and directories. - - action: deny -``` +* Meta actions related to those files, e.g. discussion, proposed changes, etc. -## Change Hash +* Configuration defining which meta actions are allowed under which + circumstances. -When a change commit (see Commits section) is being signed by a signifier there -is an expected data format for the data to be signed. The format is a SHA-256 -hash of the following pieces of data concatenated together: +All of these components are housed in a git repository. A dehub project does not +require a central repository location (a "remote"), though it may use one if +desired. -* A uvarint indicating the number of bytes in the commit message. -* The message. -* A uvarint indicating the number of files changed. -* For each file changed in the commit, ordered lexographically-ascending based - on its full relative path within the repo, the following is then written: - * A uvarint indicating the length of the full relative path of the file - within the repo. - * The full relative path of the file within the repo. - * A little-endian uint32 representing the previous file mode of the file (or 0 - if the file is being inserted). - * The 20-byte SHA1 hash of the previous version of the file's contents (or 20 - 0 bytes if the file is being inserted). - * A little-endian uint32 representing the new file mode of the file (or 0 - if the file is being deleted). - * The 20-byte SHA1 hash of the new version of the file's contents (or 20 - 0 bytes if the file is being deleted). +## Commit Payload -The raw output from the SHA-256 is then prepended with a `0` byte (for forward -compatibility). The result is the raw change hash. +All commits in a dehub project contain a payload. The payload is encoded into +the commit message as a YAML object. Here is the general structure of a commit +message containing a payload: -## Comment Message Hash +``` +Human readable message head -When a comment commit (see Commits section) is being signed by the signifier of -the author there is an expected data format for the data to be signed, very -similar to how change hashes are signed. The format is a SHA-256 hash of the -following pieces of data concatenated together: +--- +# Three dashes indicate the start of the yaml body. -* A uvarint indicating the number of bytes in the comment message. -* The message. +type: type of the payload # Always required +fingerprint: std-base-64 string # Always required +credentials:[...] # Not required but usually present -The raw output from the SHA-256 is then prepended with a `0` byte (for forward -compatibility). The result is the raw comment hash. +type_specific_field_a: valueA +type_specific_field_b: valueB +``` -## Credentials +The message head is a human readable description of what is being committed, and +is terminated at the first newline. Everything after the message head must be +valid YAML which encodes the payload. -All file changes need to have some kind of credential to be accepted into the -`main` branch (see Main Branch section). Each credential is encoded as a yaml -object with a `type` field. +### Fingerprint -All credentials contain enough information to correspond them to a specific -signifier in the `config.yml`, so as to be able to verify them. +Each payload object contains a `fingerprint` field. The fingerprint is an opaque +byte string encoded using standard base-64. The algorithm used to generate the +fingerprint will depend on the payload type, and can be found in each type's +sub-section in this document. -### PGP Signature Credential +### Credential -Currently there is only a single credential type, the `pgp_signature`, which -signs a raw change hash (which is communicated out-of-band of the object): +The `credentials` field is not requires, but in practice will be found on almost +every payload. The field's value will be an array of credential objects. Only +one credential object is currently supported, `pgp_signature`: -``` +```yaml type: pgp_signature -account_id: some_user_id + +# One of these fields is required. If account_id is present, it relates the +# signature to a pgp_public_key signifier defined for that account in the config +# (see the Signifier sub-section). Otherwise, the public key will be included in +# the credential itself as the value of pub_key_body. +account_id: some_user_id # Optional +pub_key_body: inlined ASCII-armored pgp public key + +# the ID (pgp fingerprint) of the key used to generate the signature pub_key_id: XXX -body: "base-64 signature body" + +# a signature of the payload's unencoded fingerprint, encoded using standard +# base-64 +body: std-base-64 signature ``` -## Commits +### Payload Types -All commit messages in dehub repositories are expected to follow the following -template (newlines included, yaml comments start with `#` and are only for -informational purposes): +#### Change Payload +A change payload encompasses a set of changes to the files in the project. To +construct the change payload one must reference the file tree of the commit +which houses the payload as well as the file tree of its parent commit; +specifically one must take the difference between them. + +A change payload looks like this: + +```yaml +type: change +fingerprint: std-base-64 string +credentials: [...] +description: |- + The description will generally start with a single line, followed by a long-form body + + The description corresponds to the body of a commit message in a "normal" + git repo. It gives a more-or-less long-form explanation of the changes being + made to the project's files. ``` -Human readable message head ---- -# Three dashes indicate the start of the yaml body. Everything after must be -# valid yaml. +##### Change Payload Fingerprint + +The unencoded fingerprint of a change payload is calculated as follows: + +* Concatenate the following: + * A uvarint indicating the number of bytes in the description string. + * The description string. + * A uvarint indicating the number of files changed between this commit and + its parent. + * For each file changed, ordered lexographically-ascending based on its full + relative path within the git repo: + * A uvarint indicating the length of the full relative path of the file + within the repo, as a string. + * The full relative path of the file within the repo, as a string. + * A little-endian uint32 representing the previous file mode of the file + (or 0 if the file is not present in the parent commit's tree). + * The 20-byte SHA1 hash of the contents of the previous version of the file + (or 20 0 bytes if the file is not present in the parent commit's tree). + * A little-endian uint32 representing the new file mode of the file (or 0 + if the file is not present in the current commit's tree). + * The 20-byte SHA1 hash of the contents of the new version of the file (or + 20 0 bytes if the file is not present in the current commit's tree). +* Calculate the SHA-256 hash of the concatenation result. +* Prepend a 0 byte to the result of the SHA-256 hash. + +This unencoded fingerprint is then standard base-64 encoded, and that is used as +the value of the fingerprint field. + +#### Comment Payload + +A comment payload encompasses no file changes, and is used only to contain a +comment made by a single user. + +A comment payload looks like this: + +```yaml: +type: comment +fingerprint: std-base-64 string +credentials: [...] +comment: |- + Hey all, how's it going? -type: type of the commit # Always required -fieldA: valueA -fieldB: valueB + Just wanted to pop by and say howdy. ``` -### Change Commits +The message head of a comment payload will generally be a truncated form of the +comment itself. + +##### Comment Payload Fingerprint + +The unencoded fingerprint of a comment payload is calculated as follows: + +* Concatenate the following: + * A uvarint indicating the number of bytes in the comment string. + * The comment string. +* Calculate the SHA-256 hash of the concatenation result. +* Prepend a 0 byte to the result of the SHA-256 hash. -Commits of type `change` correspond to the standard git commit; they encompass a -set of file changes as well as a message describing the changes which occurred. -They extend the standard git commit with a few dehub specific features, such as -the change hash and credentials. +This unencoded fingerprint is then standard base-64 encoded, and that is used as +the value of the fingerprint field. -`change` commits are, currently, the _only_ commit type which are allowed to -have file changes. +#### Credential Payload -Example change commit message: +A credential payload contains only one or more credentials for an arbitrary +fingerprint. Credential payloads can be combined with other payloads of the same +fingerprint to create a new payload with many credentials. +A credential payload looks like this: + +```yaml +type: credential +fingerprint: std-base-64 string +credentials: [...] + +# This field is not required, but can be helpful in situations where the +# fingerprint was generated based on multiple change payloads +fingerprint_commits: + - commit hash + - commit hash + - commit hash ``` -This is the message head. It will be re-iterated within the message field ---- -type: change -message: > - This is the message head. It will be re-iterated within the message field +## Project Configuration - The rest of this field is for the message body, which corresponds to the - body of a normal commit message which might give a more long-form - explanation of the commit's changes. +The `.dehub` directory contains all meta information related to the dehub +project. All files within `.dehub` are tracked by the git repo like any other +files in the project. - Since the message is used in generating the signature it's necessary for it - to be encoded here fully formed, even though the message head is then - duplicated. Otherwise the exact bytes of the message would be ambiguous. - This situation is ugly, but not unbearable. +### config.yml -# The change_hash is able to be computed from the commit's message and changed -# files, but is reproduced in the commit message for forward compatibility, e.g. -# if the algorithm to compute the hash changes. -change_hash: XXX +The `.dehub/config.yml` file contains a yaml encoded configuration object: -# Credentials are the set of credentials which indicate approval of the change -credentials: - - type: pgp_signature - account_id: some_user_id - pub_key_id: XXX - body: "base-64 signature body" +```yaml +accounts: [...] +access_controls: [...] ``` -### Credential Commits +Both fields are described in their own sub-section below. -Commits of type `credential` contain one or more credentials for some hash -(presumably a change hash, but in the future there may be other types). The -commit message head is not spec'd, but should be a human-readable description of -"who is crediting what, and how". +#### Account -Example credential commit message: +An account defines a specific user of the project. Every account has an ID; no +two accounts within a project may share the same ID. +An account looks like this: + +```yaml +id: some_string +signifiers: [...] ``` -some_user_id pgp sig of commits AAA..BBB with key CCC ---- -type: credential -credentialed_hash: XXX -credentials: - - type: pgp_signature - account_id: some_user_id - pub_key_id: CCC - body: "base-64 signature body" +##### Signifier + +A signifier is used to signify that an account has taken some action. The most +common use-case is to prove that an account created a particular credential. An +account may have more than one signifier. + +Currently there is only one signifier type, `pgp_public_key`: + +```yaml +type: pgp_public_key + +# Path to ASCII-armored pgp public key, relative to repo root. +path: .dehub/account.asc ``` -### Comment Commits +or + +```yaml +type: pgp_public_key +body: inlined ASCII-armored pgp public key +``` -Commits of type `comment` contain a message for others to read. The commit -message head is not spec'd, but should be a human-readable description of "who -is commenting what". +#### Access Control -Example credential commit message: +An access control allows or denies a particular commit from becoming a part of +the project. Each action control has an action (allow or deny) and a set of +filters: +```yaml +action: allow # or deny +filters: [...] ``` -some_user_id has commented: Hey all, how's it going? ---- -type: comment +When a verifying a commit against a project's access controls, each access +control's filters are applied to the commit in the order they appear in the +configuration. The first access control for which all filters match is found, +and its action is taken. -# The message_hash is computed from the message, and reproduced here for -# forwards compatibility. See the Comment Message Hash section. -message_hash: XXX -message: > - Hey all, how's it going? +An access control with no filters matches all commits. - Just wanted to pop by and say howdy. +##### Filters + +There are many kinds of access control filters. Any filter can be applied to a +commit, with no other input, and produce a boolean value. All filters have a +`type` field which indicates their type. + +###### Signature Filter + +A filter of type `signature` asserts that a commit's payload contains signature +credentials with certain properties. A signature filter must have one of these +fields, which define the set of users or accounts whose signatures are +applicable. + +* `account_ids: [...]` - an array of account IDs, each having been defined in the + accounts section of the configuration. + +* `any_account: true` - matches any account defined in the accounts section of + the configuration. + +* `any: true` - matches any signature, whether or not its signifier has been + defined in the configuration. + +A `count` field may also be included. Its value may be a number or a string +indicating a percent (e.g. `"50%"`). If not included it will be assumed to be +`1`. -# credentials can contain a signature from the author of this comment's -# message_hash. -credentials: - - type: pgp_signature - account_id: some_user_id - pub_key_id: CCC - body: "base-64 signature body" +The count indicates how many accounts from the specified set must have a +signature included. If a percent is given then that will be multiplied against +the size of the set (rounded up) to determine the necessary number. + +Here are some example signature filters, and explanations for each: + +```yaml +# requires that 2 of the 3 specified accounts has a signature credential on +# the commit. +type: signature +account_ids: + - amy + - bill + - colleen +count: 2 ``` -## TODO +```yaml +# requires that every account defined in the configuration has a signature +# credential on the commit. +type: signature +any_account: true +count: 100% +``` + +```yaml +# requires at least one signature credential, not necessarily from an account. +type: signature +any: true +``` + +###### Branch Filter + +A filter of type `branch` matches the commit based on which branch in the repo +it is being or has been committed to. Matching is performed on the short name +of the branch, using globstar pattern matching. + +A branch filter can have one or multiple patterns defined. The filter will match +if at least one defined pattern matches the short form of the branch name. + +A branch filter with only one pattern can be defined like this: + +```yaml +type: branch +pattern: some_branch +``` + +A branch filter with multiple patterns can be defined like this: + +```yaml +type: branch +patterns: + - some_branch + - branch*glob + - amy/** +``` + +###### Files Changed Filter + +A filter of type `files_changed` matches the commit based on which files were +changed between the tree of the commit's parent and the commit's tree. Matching +is performed on the paths of the changed files, relative to the repo root. + +A files changed filter can have one or multiple patterns defined. The filter +will match if at least one defined pattern matches for every file changed. + +(TODO this may change to be: `The filter will match if any of the changed files +matches at least one defined pattern.`) + +A files changed filter with only one pattern can be defined like this: + +```yaml +type: files_changed +pattern: .dehub/* +``` + +A files changed filter with multiple patterns can be defined like this: + +```yaml +type: files_changed +patterns: + - some/dir/* + - foo_files_* + - **.jpg +``` + +###### Payload Type Filter + +A filter of type `payload_type` matches a commit based on the type of its +payload. A payload type filter can have one or more types defined. The filter +will match if the commit's payload type matches at least one of the defined +types. + +A payload type filter with only one matching type can be defined like this: + +```yaml +type: payload_type +payload_type: comment +``` + +A payload type filter with multiple matching types can be defined like this: + +```yaml +type: payload_type +payload_types: + - comment + - change +``` + +###### Commit Attributes Filter + +A filter of type `commit_attributes` matches a commit based on certain +attributes it has. A commit attributes filter may have one or more fields +defined, each corresponding to a different attribute the commit may have. If +more than one field is defined then all corresponding attributes on the commit +must match for the filter to match. + +Currently the only possible attribute is `non_fast_forward: true`, which matches +a commit which is not an ancestor of the HEAD of the branch it's being pushed +onto. This attribute only makes sense in the context of a pre-receive git hook. + +A commit attributes filter looks like this: + +```yaml +type: commit_attributes +non_fast_forward: true +``` + +###### Not Filter + +A filter of type `not` matches a commit using the negation of a sub-filter, +defined within the not filter. If the sub-filter returns true for the commit, +then the not filter returns false, and vice-versa. + +A not filter looks like this: + +``` +type: not +filter: + # a branch filter is used as the sub-filter in this example + type: branch + pattern: main +``` + +##### Default Access Controls + +These access controls will be implicitly appended to the list defined in the +configuration: + +```yaml +# Any account may add any commit to any non-main branch, provided there is at +# least one signature credential. This includes non-fast-forwards. +- action: allow + filters: + - type: not + filter: + type: branch + pattern: main + - type: signature + any_account: true + count: 1 + +# Non-fast-forwards are denied in all other cases. In effect, one cannot +# force-push onto the main branch. +- action: deny + filters: + - type: commit_attributes + non_fast_forward: true + +# Any account may add any change commit to the main branch, provided there is +# at least one signature credential. +- action: allow + filters: + - type: branch + pattern: main + - type: payload_type + payload_type: change + - type: signature + any_account: true + count: 1 + +# All other actions are denied. +- action: deny +``` + +These default access controls provide a useful baseline of requirements that all +projects will (hopefully) find useful in their infancy. + +## Commit Verification + +The dehub protocol is designed such that every commit is "verifiable". A +verifiable commit has the following properties: + +* Its fingerprint is correctly formed. +* All of its credentials are correctly formed. + * If they are signatures, they are valid signatures of the commit's + unencoded fingerprint. +* The project's access controls allow the commit. + +The project's configuration is referenced frequently when verifying a commit, +such as when determining which access controls to apply and discovering +signifiers of accounts. In all cases the configuration as defined in the +commit's _parent_ is used when verifying that commit. The exception is the prime +commit, which uses its own configuration. + +### Prime Commit + +The prime commit is the trusted seed of the project. When a user clones and +verifies a dehub project they must, implicitly or explicitly, trust the contents +of the prime commit. All other commits must be ancestors of the prime commit. + +Manually specifying a prime commit is not currently spec'd, but it will be. -* Access controls -* Update credential commit section +By default the prime commit is the root commit of the `main` branch.