Implement Decoder

The decoder basically works, though there's some quirks in the design I'll need to marinate one. For example, you can't have a tuple as an edge value. This is probably fine? Stringification of Graphs was added to aid in debugging the decoder, the format it outputs is not the final one. Most likely the (future) encoder will be used for that purpose. The decoder is not implemented in the nicest way; it fully reads in the LexerTokens first, and then processes. This made trying to wrap my head around the problem a lot easier because it left fewer failure cases, but it's not the most efficient thing to do. Now that v0 is done it's pretty plain to see that the decoder could work by only reading in the next N tokens that it needs at a time. But that will be left for a future version.
2 years ago · 33e59a3836
parent 82e74cb55f
commit 33e59a3836
7 changed files with 579 additions and 25 deletions
--- a/gg/decoder.go
+++ b/gg/decoder.go
@ -0,0 +1,318 @@
+package gg
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+)
+
+// Punctuations which are used in the gg file format.
+const (
+	punctTerm       = ";"
+	punctOp         = "<"
+	punctAssign     = "="
+	punctOpenGraph  = "{"
+	punctCloseGraph = "}"
+	punctOpenTuple  = "("
+	punctCloseTuple = ")"
+)
+
+func decoderErr(tok LexerToken, err error) error {
+	return fmt.Errorf("%d:%d: %w", tok.Row, tok.Col, err)
+}
+
+func decoderErrf(tok LexerToken, str string, args ...interface{}) error {
+	return decoderErr(tok, fmt.Errorf(str, args...))
+}
+
+func isPunct(tok LexerToken, val string) bool {
+	return tok.Kind == LexerTokenKindPunctuation && tok.Value == val
+}
+
+func isTerm(tok LexerToken) bool {
+	return isPunct(tok, punctTerm)
+}
+
+// decoder is currently only really used to namespace functions related to
+// decoding Graphs. It may later have actual fields added to it, such as for
+// options passed by the caller.
+type decoder struct{}
+
+// returned boolean value indicates if the token following the single token is a
+// term. If a term followed the first token then it is not included in the
+// returned leftover tokens.
+//
+// if termed is false then leftover tokens cannot be empty.
+func (d *decoder) parseSingleValue(
+	toks []LexerToken,
+) (
+	Value, []LexerToken, bool, error,
+) {
+
+	tok, rest := toks[0], toks[1:]
+
+	if len(rest) == 0 {
+		return Value{}, nil, false, decoderErrf(tok, "cannot be final token, possibly missing %q", punctTerm)
+	}
+
+	termed := isTerm(rest[0])
+
+	if termed {
+		rest = rest[1:]
+	}
+
+	switch tok.Kind {
+
+	case LexerTokenKindName:
+		return Value{Name: &tok.Value}, rest, termed, nil
+
+	case LexerTokenKindNumber:
+
+		i, err := strconv.ParseInt(tok.Value, 10, 64)
+
+		if err != nil {
+			return Value{}, nil, false, decoderErrf(tok, "parsing %q as integer: %w", tok.Value, err)
+		}
+
+		return Value{Number: &i}, rest, termed, nil
+
+	case LexerTokenKindPunctuation:
+		return Value{}, nil, false, decoderErrf(tok, "expected value, found punctuation %q", tok.Value)
+
+	default:
+		panic(fmt.Sprintf("unexpected token kind %q", tok.Kind))
+	}
+}
+
+func (d *decoder) parseOpenEdge(
+	toks []LexerToken,
+) (
+	OpenEdge, []LexerToken, error,
+) {
+
+	if isPunct(toks[0], punctOpenTuple) {
+		return d.parseTuple(toks)
+	}
+
+	var (
+		val    Value
+		termed bool
+		err    error
+	)
+
+	switch {
+
+	case isPunct(toks[0], punctOpenGraph):
+		val, toks, termed, err = d.parseGraphValue(toks, true)
+
+	default:
+		val, toks, termed, err = d.parseSingleValue(toks)
+	}
+
+	if err != nil {
+		return OpenEdge{}, nil, err
+
+	}
+
+	if termed {
+		return ValueOut(val, Value{}), toks, nil
+	}
+
+	opTok, toks := toks[0], toks[1:]
+
+	if !isPunct(opTok, punctOp) {
+		return OpenEdge{}, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm)
+	}
+
+	if len(toks) == 0 {
+		return OpenEdge{}, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp)
+	}
+
+	oe, toks, err := d.parseOpenEdge(toks)
+
+	if err != nil {
+		return OpenEdge{}, nil, err
+	}
+
+	oe = TupleOut([]OpenEdge{oe}, val)
+
+	return oe, toks, nil
+}
+
+func (d *decoder) parseTuple(
+	toks []LexerToken,
+) (
+	OpenEdge, []LexerToken, error,
+) {
+
+	openTok, toks := toks[0], toks[1:]
+
+	var edges []OpenEdge
+
+	for {
+
+		if len(toks) == 0 {
+			return OpenEdge{}, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple)
+
+		} else if isPunct(toks[0], punctCloseTuple) {
+			toks = toks[1:]
+			break
+		}
+
+		var (
+			oe  OpenEdge
+			err error
+		)
+
+		oe, toks, err = d.parseOpenEdge(toks)
+
+		if err != nil {
+			return OpenEdge{}, nil, err
+		}
+
+		edges = append(edges, oe)
+	}
+
+	// this is a quirk of the syntax, _technically_ a tuple doesn't need a
+	// term after it, since it can't be used as an edge value, and so
+	// nothing can come after it in the chain.
+	if len(toks) > 0 && isTerm(toks[0]) {
+		toks = toks[1:]
+	}
+
+	return TupleOut(edges, Value{}), toks, nil
+}
+
+// returned boolean value indicates if the token following the graph is a term.
+// If a term followed the first token then it is not included in the returned
+// leftover tokens.
+//
+// if termed is false then leftover tokens cannot be empty.
+func (d *decoder) parseGraphValue(
+	toks []LexerToken, expectWrappers bool,
+) (
+	Value, []LexerToken, bool, error,
+) {
+
+	var openTok LexerToken
+
+	if expectWrappers {
+		openTok, toks = toks[0], toks[1:]
+	}
+
+	g := ZeroGraph
+
+	for {
+
+		if len(toks) == 0 {
+
+			if !expectWrappers {
+				break
+			}
+
+			return Value{}, nil, false, decoderErrf(openTok, "no matching %q", punctCloseGraph)
+
+		} else if closingTok := toks[0]; isPunct(closingTok, punctCloseGraph) {
+
+			if !expectWrappers {
+				return Value{}, nil, false, decoderErrf(closingTok, "unexpected %q", punctCloseGraph)
+			}
+
+			toks = toks[1:]
+
+			if len(toks) == 0 {
+				return Value{}, nil, false, decoderErrf(closingTok, "cannot be final token, possibly missing %q", punctTerm)
+			}
+
+			break
+		}
+
+		var err error
+
+		if g, toks, err = d.parseValIn(g, toks); err != nil {
+			return Value{}, nil, false, err
+		}
+	}
+
+	val := Value{Graph: g}
+
+	if !expectWrappers {
+		return val, toks, true, nil
+	}
+
+	termed := isTerm(toks[0])
+
+	if termed {
+		toks = toks[1:]
+	}
+
+	return val, toks, termed, nil
+}
+
+func (d *decoder) parseValIn(into *Graph, toks []LexerToken) (*Graph, []LexerToken, error) {
+
+	if len(toks) == 0 {
+		return into, nil, nil
+
+	} else if len(toks) < 3 {
+		return nil, nil, decoderErrf(toks[0], `must be of the form "<name> = ..."`)
+	}
+
+	dst := toks[0]
+	eq := toks[1]
+	toks = toks[2:]
+
+	if dst.Kind != LexerTokenKindName {
+		return nil, nil, decoderErrf(dst, "must be a name")
+
+	} else if !isPunct(eq, punctAssign) {
+		return nil, nil, decoderErrf(eq, "must be %q", punctAssign)
+	}
+
+	oe, toks, err := d.parseOpenEdge(toks)
+
+	if err != nil {
+		return nil, nil, err
+	}
+
+	dstVal := Value{Name: &dst.Value}
+
+	return into.AddValueIn(oe, dstVal), toks, nil
+}
+
+func (d *decoder) decode(lexer Lexer) (*Graph, error) {
+
+	var toks []LexerToken
+
+	for {
+
+		tok, err := lexer.Next()
+
+		if errors.Is(err, io.EOF) {
+			break
+
+		} else if err != nil {
+			return nil, fmt.Errorf("reading next token: %w", err)
+		}
+
+		toks = append(toks, tok)
+	}
+
+	val, _, _, err := d.parseGraphValue(toks, false)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return val.Graph, nil
+}
+
+// DecodeLexer reads lexigraphical tokens from the given Lexer and uses them to
+// construct a Graph according to the rules of the gg file format. DecodeLexer
+// will only return an error if there is a non-EOF file returned from the Lexer,
+// or the tokens read cannot be used to construct a valid Graph.
+func DecodeLexer(lexer Lexer) (*Graph, error) {
+	decoder := &decoder{}
+	return decoder.decode(lexer)
+}
--- a/gg/decoder_test.go
+++ b/gg/decoder_test.go
@ -0,0 +1,146 @@
+package gg
+
+import (
+	"strconv"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestDecoder(t *testing.T) {
+
+	i := func(i int64) Value {
+		return Value{Number: &i}
+	}
+
+	n := func(n string) Value {
+		return Value{Name: &n}
+	}
+
+	tests := []struct {
+		in  string
+		exp *Graph
+	}{
+		{
+			in:  "",
+			exp: ZeroGraph,
+		},
+		{
+			in:  "out = 1;",
+			exp: ZeroGraph.AddValueIn(ValueOut(i(1), Value{}), n("out")),
+		},
+		{
+			in:  "out = incr < 1;",
+			exp: ZeroGraph.AddValueIn(ValueOut(i(1), n("incr")), n("out")),
+		},
+		{
+			in: "out = a < b < 1;",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{ValueOut(i(1), n("b"))},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = a < b < (1; c < 2; d < e < 3;);",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{TupleOut(
+						[]OpenEdge{
+							ValueOut(i(1), Value{}),
+							ValueOut(i(2), n("c")),
+							TupleOut(
+								[]OpenEdge{ValueOut(i(3), n("e"))},
+								n("d"),
+							),
+						},
+						n("b"),
+					)},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = a < b < (1; c < (d < 2; 3;); );",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{TupleOut(
+						[]OpenEdge{
+							ValueOut(i(1), Value{}),
+							TupleOut(
+								[]OpenEdge{
+									ValueOut(i(2), n("d")),
+									ValueOut(i(3), Value{}),
+								},
+								n("c"),
+							),
+						},
+						n("b"),
+					)},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = { a = 1; b = c < d < 2; };",
+			exp: ZeroGraph.AddValueIn(
+				ValueOut(
+					Value{Graph: ZeroGraph.
+						AddValueIn(ValueOut(i(1), Value{}), n("a")).
+						AddValueIn(
+							TupleOut(
+								[]OpenEdge{
+									ValueOut(i(2), n("d")),
+								},
+								n("c"),
+							),
+							n("b"),
+						),
+					},
+					Value{},
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = a < { b = 1; } < 2;",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{
+						ValueOut(
+							i(2),
+							Value{Graph: ZeroGraph.
+								AddValueIn(ValueOut(i(1), Value{}), n("b")),
+							},
+						),
+					},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "a = 1; b = 2;",
+			exp: ZeroGraph.
+				AddValueIn(ValueOut(i(1), Value{}), n("a")).
+				AddValueIn(ValueOut(i(2), Value{}), n("b")),
+		},
+	}
+
+	for i, test := range tests {
+		t.Run(strconv.Itoa(i), func(t *testing.T) {
+
+			r := &mockReader{body: []byte(test.in)}
+			lexer := NewLexer(r)
+
+			got, err := DecodeLexer(lexer)
+			assert.NoError(t, err)
+			assert.True(t, Equal(got, test.exp), "\nexp:%v\ngot:%v", test.exp, got)
+
+		})
+	}
+}
--- a/gg/gg.go
+++ b/gg/gg.go
@ -1,6 +1,11 @@
 // Package gg implements ginger graph creation, traversal, and (de)serialization
 package gg

+import (
+	"fmt"
+	"strings"
+)
+
 // Value represents a value being stored in a Graph. No more than one field may
 // be non-nil. No fields being set indicates lack of value.
 type Value struct {
@ -33,19 +38,40 @@ func (v Value) Equal(v2 Value) bool {
 	}
 }

+func (v Value) String() string {
+
+	switch {
+
+	case v == Value{}:
+		return "<noval>"
+
+	case v.Name != nil:
+		return *v.Name
+
+	case v.Number != nil:
+		return fmt.Sprint(*v.Number)
+
+	case v.Graph != nil:
+		return v.Graph.String()
+
+	default:
+		panic("unknown value kind")
+	}
+}
+
 // VertexType enumerates the different possible vertex types.
 type VertexType string

 const (
 	// ValueVertex is a Vertex which contains exactly one value and has at least
 	// one edge (either input or output).
-	ValueVertex VertexType = "value"
+	ValueVertex VertexType = "val"

 	// TupleVertex is a Vertex which contains two or more in edges and
 	// exactly one out edge
 	//
 	// TODO ^ what about 0 or 1 in edges?
-	TupleVertex VertexType = "tuple"
+	TupleVertex VertexType = "tup"
 )

 ////////////////////////////////////////////////////////////////////////////////
@ -64,6 +90,10 @@ func (oe OpenEdge) WithEdgeVal(val Value) OpenEdge {
 	return oe
 }

+func (oe OpenEdge) String() string {
+	return fmt.Sprintf("%s(%s, %s)", oe.fromV.VertexType, oe.fromV.String(), oe.val.String())
+}
+
 // ValueOut creates a OpenEdge which, when used to construct a Graph, represents
 // an edge (with edgeVal attached to it) coming from the ValueVertex containing
 // val.
@ -79,8 +109,16 @@ func ValueOut(val, edgeVal Value) OpenEdge {
 // returned as-is.
 func TupleOut(ins []OpenEdge, edgeVal Value) OpenEdge {

-	if len(ins) == 1 && edgeVal == (Value{}) {
-		return ins[0]
+	if len(ins) == 1 {
+
+		if edgeVal == (Value{}) {
+			return ins[0]
+		}
+
+		if ins[0].val == (Value{}) {
+			return ins[0].WithEdgeVal(edgeVal)
+		}
+
 	}

 	return OpenEdge{
@ -130,6 +168,29 @@ func (v vertex) equal(v2 vertex) bool {
 	return true
 }

+func (v vertex) String() string {
+
+	switch v.VertexType {
+
+	case ValueVertex:
+		return v.val.String()
+
+	case TupleVertex:
+
+		strs := make([]string, len(v.tup))
+
+		for i := range v.tup {
+			strs[i] = v.tup[i].String()
+		}
+
+		return fmt.Sprintf("[%s]", strings.Join(strs, ", "))
+
+	default:
+		panic("unknown vertix kind")
+	}
+
+}
+
 type graphValueIn struct {
 	val   Value
 	edges []OpenEdge
@ -190,8 +251,21 @@ func (g *Graph) cp() *Graph {
 	return cp
 }

-////////////////////////////////////////////////////////////////////////////////
-// Graph creation
+func (g *Graph) String() string {
+
+	var strs []string
+
+	for _, valIn := range g.valIns {
+		for _, oe := range valIn.edges {
+			strs = append(
+				strs,
+				fmt.Sprintf("valIn(%s, %s)", oe.String(), valIn.val.String()),
+			)
+		}
+	}
+
+	return fmt.Sprintf("graph(%s)", strings.Join(strs, ", "))
+}

 func (g *Graph) valIn(val Value) graphValueIn {
 	for _, valIn := range g.valIns {
--- a/gg/gg_test.go
+++ b/gg/gg_test.go
@ -57,7 +57,7 @@ func TestEqual(t *testing.T) {
 			exp: false,
 		},
 		{
-			// tuple with a single input edge that has no edgeVal should be
+			// tuple with no edge value and just a single input edge should be
 			// equivalent to just that edge.
 			a: ZeroGraph.AddValueIn(TupleOut([]OpenEdge{
 				ValueOut(i(1), n("ident")),
@ -65,6 +65,16 @@ func TestEqual(t *testing.T) {
 			b:   ZeroGraph.AddValueIn(ValueOut(i(1), n("ident")), n("out")),
 			exp: true,
 		},
+		{
+			// tuple with an edge value and just a single input edge that has no
+			// edgeVal should be equivalent to just that edge with the tuple's
+			// edge value.
+			a: ZeroGraph.AddValueIn(TupleOut([]OpenEdge{
+				ValueOut(i(1), Value{}),
+			}, n("ident")), n("out")),
+			b:   ZeroGraph.AddValueIn(ValueOut(i(1), n("ident")), n("out")),
+			exp: true,
+		},
 		{
 			a: ZeroGraph.
 				AddValueIn(ValueOut(n("in"), n("incr")), n("out")).
--- a/gg/lexer.go
+++ b/gg/lexer.go
@ -16,7 +16,7 @@ type LexerError struct {
 }

 func (e *LexerError) Error() string {
-	return fmt.Sprintf("%d: %d: %s", e.Col, e.Row, e.Err.Error())
+	return fmt.Sprintf("%d:%d: %s", e.Row, e.Col, e.Err.Error())
 }

 func (e *LexerError) Unwrap() error {
--- a/gg/lexer_test.go
+++ b/gg/lexer_test.go
@ -9,23 +9,6 @@ import (
 	"github.com/stretchr/testify/assert"
 )

-type mockReader struct {
-	body []byte
-	err  error
-}
-
-func (r *mockReader) Read(b []byte) (int, error) {
-
-	n := copy(b, r.body)
-	r.body = r.body[n:]
-
-	if len(r.body) == 0 {
-		return n, r.err
-	}
-
-	return n, nil
-}
-
 func TestLexer(t *testing.T) {

 	expErr := errors.New("eof")
--- a/gg/util_test.go
+++ b/gg/util_test.go
@ -0,0 +1,23 @@
+package gg
+
+import "io"
+
+type mockReader struct {
+	body []byte
+	err  error
+}
+
+func (r *mockReader) Read(b []byte) (int, error) {
+
+	n := copy(b, r.body)
+	r.body = r.body[n:]
+
+	if len(r.body) == 0 {
+		if r.err == nil {
+			return n, io.EOF
+		}
+		return n, r.err
+	}
+
+	return n, nil
+}