Implement Decoder

The decoder basically works, though there's some quirks in the design
I'll need to marinate one. For example, you can't have a tuple as an
edge value. This is probably fine?

Stringification of Graphs was added to aid in debugging the decoder, the
format it outputs is not the final one. Most likely the (future) encoder
will be used for that purpose.

The decoder is not implemented in the nicest way; it fully reads in the
LexerTokens first, and then processes. This made trying to wrap my head
around the problem a lot easier because it left fewer failure cases, but
it's not the most efficient thing to do.

Now that v0 is done it's pretty plain to see that the decoder could work
by only reading in the next N tokens that it needs at a time. But that
will be left for a future version.
This commit is contained in:
Brian Picciano 2021-12-26 16:23:41 -07:00
parent 82e74cb55f
commit 33e59a3836
7 changed files with 579 additions and 25 deletions

318
gg/decoder.go Normal file
View File

@ -0,0 +1,318 @@
package gg
import (
"errors"
"fmt"
"io"
"strconv"
)
// Punctuations which are used in the gg file format.
const (
punctTerm = ";"
punctOp = "<"
punctAssign = "="
punctOpenGraph = "{"
punctCloseGraph = "}"
punctOpenTuple = "("
punctCloseTuple = ")"
)
func decoderErr(tok LexerToken, err error) error {
return fmt.Errorf("%d:%d: %w", tok.Row, tok.Col, err)
}
func decoderErrf(tok LexerToken, str string, args ...interface{}) error {
return decoderErr(tok, fmt.Errorf(str, args...))
}
func isPunct(tok LexerToken, val string) bool {
return tok.Kind == LexerTokenKindPunctuation && tok.Value == val
}
func isTerm(tok LexerToken) bool {
return isPunct(tok, punctTerm)
}
// decoder is currently only really used to namespace functions related to
// decoding Graphs. It may later have actual fields added to it, such as for
// options passed by the caller.
type decoder struct{}
// returned boolean value indicates if the token following the single token is a
// term. If a term followed the first token then it is not included in the
// returned leftover tokens.
//
// if termed is false then leftover tokens cannot be empty.
func (d *decoder) parseSingleValue(
toks []LexerToken,
) (
Value, []LexerToken, bool, error,
) {
tok, rest := toks[0], toks[1:]
if len(rest) == 0 {
return Value{}, nil, false, decoderErrf(tok, "cannot be final token, possibly missing %q", punctTerm)
}
termed := isTerm(rest[0])
if termed {
rest = rest[1:]
}
switch tok.Kind {
case LexerTokenKindName:
return Value{Name: &tok.Value}, rest, termed, nil
case LexerTokenKindNumber:
i, err := strconv.ParseInt(tok.Value, 10, 64)
if err != nil {
return Value{}, nil, false, decoderErrf(tok, "parsing %q as integer: %w", tok.Value, err)
}
return Value{Number: &i}, rest, termed, nil
case LexerTokenKindPunctuation:
return Value{}, nil, false, decoderErrf(tok, "expected value, found punctuation %q", tok.Value)
default:
panic(fmt.Sprintf("unexpected token kind %q", tok.Kind))
}
}
func (d *decoder) parseOpenEdge(
toks []LexerToken,
) (
OpenEdge, []LexerToken, error,
) {
if isPunct(toks[0], punctOpenTuple) {
return d.parseTuple(toks)
}
var (
val Value
termed bool
err error
)
switch {
case isPunct(toks[0], punctOpenGraph):
val, toks, termed, err = d.parseGraphValue(toks, true)
default:
val, toks, termed, err = d.parseSingleValue(toks)
}
if err != nil {
return OpenEdge{}, nil, err
}
if termed {
return ValueOut(val, Value{}), toks, nil
}
opTok, toks := toks[0], toks[1:]
if !isPunct(opTok, punctOp) {
return OpenEdge{}, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm)
}
if len(toks) == 0 {
return OpenEdge{}, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp)
}
oe, toks, err := d.parseOpenEdge(toks)
if err != nil {
return OpenEdge{}, nil, err
}
oe = TupleOut([]OpenEdge{oe}, val)
return oe, toks, nil
}
func (d *decoder) parseTuple(
toks []LexerToken,
) (
OpenEdge, []LexerToken, error,
) {
openTok, toks := toks[0], toks[1:]
var edges []OpenEdge
for {
if len(toks) == 0 {
return OpenEdge{}, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple)
} else if isPunct(toks[0], punctCloseTuple) {
toks = toks[1:]
break
}
var (
oe OpenEdge
err error
)
oe, toks, err = d.parseOpenEdge(toks)
if err != nil {
return OpenEdge{}, nil, err
}
edges = append(edges, oe)
}
// this is a quirk of the syntax, _technically_ a tuple doesn't need a
// term after it, since it can't be used as an edge value, and so
// nothing can come after it in the chain.
if len(toks) > 0 && isTerm(toks[0]) {
toks = toks[1:]
}
return TupleOut(edges, Value{}), toks, nil
}
// returned boolean value indicates if the token following the graph is a term.
// If a term followed the first token then it is not included in the returned
// leftover tokens.
//
// if termed is false then leftover tokens cannot be empty.
func (d *decoder) parseGraphValue(
toks []LexerToken, expectWrappers bool,
) (
Value, []LexerToken, bool, error,
) {
var openTok LexerToken
if expectWrappers {
openTok, toks = toks[0], toks[1:]
}
g := ZeroGraph
for {
if len(toks) == 0 {
if !expectWrappers {
break
}
return Value{}, nil, false, decoderErrf(openTok, "no matching %q", punctCloseGraph)
} else if closingTok := toks[0]; isPunct(closingTok, punctCloseGraph) {
if !expectWrappers {
return Value{}, nil, false, decoderErrf(closingTok, "unexpected %q", punctCloseGraph)
}
toks = toks[1:]
if len(toks) == 0 {
return Value{}, nil, false, decoderErrf(closingTok, "cannot be final token, possibly missing %q", punctTerm)
}
break
}
var err error
if g, toks, err = d.parseValIn(g, toks); err != nil {
return Value{}, nil, false, err
}
}
val := Value{Graph: g}
if !expectWrappers {
return val, toks, true, nil
}
termed := isTerm(toks[0])
if termed {
toks = toks[1:]
}
return val, toks, termed, nil
}
func (d *decoder) parseValIn(into *Graph, toks []LexerToken) (*Graph, []LexerToken, error) {
if len(toks) == 0 {
return into, nil, nil
} else if len(toks) < 3 {
return nil, nil, decoderErrf(toks[0], `must be of the form "<name> = ..."`)
}
dst := toks[0]
eq := toks[1]
toks = toks[2:]
if dst.Kind != LexerTokenKindName {
return nil, nil, decoderErrf(dst, "must be a name")
} else if !isPunct(eq, punctAssign) {
return nil, nil, decoderErrf(eq, "must be %q", punctAssign)
}
oe, toks, err := d.parseOpenEdge(toks)
if err != nil {
return nil, nil, err
}
dstVal := Value{Name: &dst.Value}
return into.AddValueIn(oe, dstVal), toks, nil
}
func (d *decoder) decode(lexer Lexer) (*Graph, error) {
var toks []LexerToken
for {
tok, err := lexer.Next()
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return nil, fmt.Errorf("reading next token: %w", err)
}
toks = append(toks, tok)
}
val, _, _, err := d.parseGraphValue(toks, false)
if err != nil {
return nil, err
}
return val.Graph, nil
}
// DecodeLexer reads lexigraphical tokens from the given Lexer and uses them to
// construct a Graph according to the rules of the gg file format. DecodeLexer
// will only return an error if there is a non-EOF file returned from the Lexer,
// or the tokens read cannot be used to construct a valid Graph.
func DecodeLexer(lexer Lexer) (*Graph, error) {
decoder := &decoder{}
return decoder.decode(lexer)
}

146
gg/decoder_test.go Normal file
View File

@ -0,0 +1,146 @@
package gg
import (
"strconv"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDecoder(t *testing.T) {
i := func(i int64) Value {
return Value{Number: &i}
}
n := func(n string) Value {
return Value{Name: &n}
}
tests := []struct {
in string
exp *Graph
}{
{
in: "",
exp: ZeroGraph,
},
{
in: "out = 1;",
exp: ZeroGraph.AddValueIn(ValueOut(i(1), Value{}), n("out")),
},
{
in: "out = incr < 1;",
exp: ZeroGraph.AddValueIn(ValueOut(i(1), n("incr")), n("out")),
},
{
in: "out = a < b < 1;",
exp: ZeroGraph.AddValueIn(
TupleOut(
[]OpenEdge{ValueOut(i(1), n("b"))},
n("a"),
),
n("out"),
),
},
{
in: "out = a < b < (1; c < 2; d < e < 3;);",
exp: ZeroGraph.AddValueIn(
TupleOut(
[]OpenEdge{TupleOut(
[]OpenEdge{
ValueOut(i(1), Value{}),
ValueOut(i(2), n("c")),
TupleOut(
[]OpenEdge{ValueOut(i(3), n("e"))},
n("d"),
),
},
n("b"),
)},
n("a"),
),
n("out"),
),
},
{
in: "out = a < b < (1; c < (d < 2; 3;); );",
exp: ZeroGraph.AddValueIn(
TupleOut(
[]OpenEdge{TupleOut(
[]OpenEdge{
ValueOut(i(1), Value{}),
TupleOut(
[]OpenEdge{
ValueOut(i(2), n("d")),
ValueOut(i(3), Value{}),
},
n("c"),
),
},
n("b"),
)},
n("a"),
),
n("out"),
),
},
{
in: "out = { a = 1; b = c < d < 2; };",
exp: ZeroGraph.AddValueIn(
ValueOut(
Value{Graph: ZeroGraph.
AddValueIn(ValueOut(i(1), Value{}), n("a")).
AddValueIn(
TupleOut(
[]OpenEdge{
ValueOut(i(2), n("d")),
},
n("c"),
),
n("b"),
),
},
Value{},
),
n("out"),
),
},
{
in: "out = a < { b = 1; } < 2;",
exp: ZeroGraph.AddValueIn(
TupleOut(
[]OpenEdge{
ValueOut(
i(2),
Value{Graph: ZeroGraph.
AddValueIn(ValueOut(i(1), Value{}), n("b")),
},
),
},
n("a"),
),
n("out"),
),
},
{
in: "a = 1; b = 2;",
exp: ZeroGraph.
AddValueIn(ValueOut(i(1), Value{}), n("a")).
AddValueIn(ValueOut(i(2), Value{}), n("b")),
},
}
for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
r := &mockReader{body: []byte(test.in)}
lexer := NewLexer(r)
got, err := DecodeLexer(lexer)
assert.NoError(t, err)
assert.True(t, Equal(got, test.exp), "\nexp:%v\ngot:%v", test.exp, got)
})
}
}

View File

@ -1,6 +1,11 @@
// Package gg implements ginger graph creation, traversal, and (de)serialization
package gg
import (
"fmt"
"strings"
)
// Value represents a value being stored in a Graph. No more than one field may
// be non-nil. No fields being set indicates lack of value.
type Value struct {
@ -33,19 +38,40 @@ func (v Value) Equal(v2 Value) bool {
}
}
func (v Value) String() string {
switch {
case v == Value{}:
return "<noval>"
case v.Name != nil:
return *v.Name
case v.Number != nil:
return fmt.Sprint(*v.Number)
case v.Graph != nil:
return v.Graph.String()
default:
panic("unknown value kind")
}
}
// VertexType enumerates the different possible vertex types.
type VertexType string
const (
// ValueVertex is a Vertex which contains exactly one value and has at least
// one edge (either input or output).
ValueVertex VertexType = "value"
ValueVertex VertexType = "val"
// TupleVertex is a Vertex which contains two or more in edges and
// exactly one out edge
//
// TODO ^ what about 0 or 1 in edges?
TupleVertex VertexType = "tuple"
TupleVertex VertexType = "tup"
)
////////////////////////////////////////////////////////////////////////////////
@ -64,6 +90,10 @@ func (oe OpenEdge) WithEdgeVal(val Value) OpenEdge {
return oe
}
func (oe OpenEdge) String() string {
return fmt.Sprintf("%s(%s, %s)", oe.fromV.VertexType, oe.fromV.String(), oe.val.String())
}
// ValueOut creates a OpenEdge which, when used to construct a Graph, represents
// an edge (with edgeVal attached to it) coming from the ValueVertex containing
// val.
@ -79,10 +109,18 @@ func ValueOut(val, edgeVal Value) OpenEdge {
// returned as-is.
func TupleOut(ins []OpenEdge, edgeVal Value) OpenEdge {
if len(ins) == 1 && edgeVal == (Value{}) {
if len(ins) == 1 {
if edgeVal == (Value{}) {
return ins[0]
}
if ins[0].val == (Value{}) {
return ins[0].WithEdgeVal(edgeVal)
}
}
return OpenEdge{
fromV: mkVertex(TupleVertex, Value{}, ins...),
val: edgeVal,
@ -130,6 +168,29 @@ func (v vertex) equal(v2 vertex) bool {
return true
}
func (v vertex) String() string {
switch v.VertexType {
case ValueVertex:
return v.val.String()
case TupleVertex:
strs := make([]string, len(v.tup))
for i := range v.tup {
strs[i] = v.tup[i].String()
}
return fmt.Sprintf("[%s]", strings.Join(strs, ", "))
default:
panic("unknown vertix kind")
}
}
type graphValueIn struct {
val Value
edges []OpenEdge
@ -190,8 +251,21 @@ func (g *Graph) cp() *Graph {
return cp
}
////////////////////////////////////////////////////////////////////////////////
// Graph creation
func (g *Graph) String() string {
var strs []string
for _, valIn := range g.valIns {
for _, oe := range valIn.edges {
strs = append(
strs,
fmt.Sprintf("valIn(%s, %s)", oe.String(), valIn.val.String()),
)
}
}
return fmt.Sprintf("graph(%s)", strings.Join(strs, ", "))
}
func (g *Graph) valIn(val Value) graphValueIn {
for _, valIn := range g.valIns {

View File

@ -57,7 +57,7 @@ func TestEqual(t *testing.T) {
exp: false,
},
{
// tuple with a single input edge that has no edgeVal should be
// tuple with no edge value and just a single input edge should be
// equivalent to just that edge.
a: ZeroGraph.AddValueIn(TupleOut([]OpenEdge{
ValueOut(i(1), n("ident")),
@ -65,6 +65,16 @@ func TestEqual(t *testing.T) {
b: ZeroGraph.AddValueIn(ValueOut(i(1), n("ident")), n("out")),
exp: true,
},
{
// tuple with an edge value and just a single input edge that has no
// edgeVal should be equivalent to just that edge with the tuple's
// edge value.
a: ZeroGraph.AddValueIn(TupleOut([]OpenEdge{
ValueOut(i(1), Value{}),
}, n("ident")), n("out")),
b: ZeroGraph.AddValueIn(ValueOut(i(1), n("ident")), n("out")),
exp: true,
},
{
a: ZeroGraph.
AddValueIn(ValueOut(n("in"), n("incr")), n("out")).

View File

@ -16,7 +16,7 @@ type LexerError struct {
}
func (e *LexerError) Error() string {
return fmt.Sprintf("%d: %d: %s", e.Col, e.Row, e.Err.Error())
return fmt.Sprintf("%d:%d: %s", e.Row, e.Col, e.Err.Error())
}
func (e *LexerError) Unwrap() error {

View File

@ -9,23 +9,6 @@ import (
"github.com/stretchr/testify/assert"
)
type mockReader struct {
body []byte
err error
}
func (r *mockReader) Read(b []byte) (int, error) {
n := copy(b, r.body)
r.body = r.body[n:]
if len(r.body) == 0 {
return n, r.err
}
return n, nil
}
func TestLexer(t *testing.T) {
expErr := errors.New("eof")

23
gg/util_test.go Normal file
View File

@ -0,0 +1,23 @@
package gg
import "io"
type mockReader struct {
body []byte
err error
}
func (r *mockReader) Read(b []byte) (int, error) {
n := copy(b, r.body)
r.body = r.body[n:]
if len(r.body) == 0 {
if r.err == nil {
return n, io.EOF
}
return n, r.err
}
return n, nil
}