From 33e59a38366e3482c611432d610f3aacd9611533 Mon Sep 17 00:00:00 2001
From: Brian Picciano <mediocregopher@gmail.com>
Date: Sun, 26 Dec 2021 16:23:41 -0700
Subject: [PATCH] Implement Decoder

The decoder basically works, though there's some quirks in the design
I'll need to marinate one. For example, you can't have a tuple as an
edge value. This is probably fine?

Stringification of Graphs was added to aid in debugging the decoder, the
format it outputs is not the final one. Most likely the (future) encoder
will be used for that purpose.

The decoder is not implemented in the nicest way; it fully reads in the
LexerTokens first, and then processes. This made trying to wrap my head
around the problem a lot easier because it left fewer failure cases, but
it's not the most efficient thing to do.

Now that v0 is done it's pretty plain to see that the decoder could work
by only reading in the next N tokens that it needs at a time. But that
will be left for a future version.
---
 gg/decoder.go      | 318 +++++++++++++++++++++++++++++++++++++++++++++
 gg/decoder_test.go | 146 +++++++++++++++++++++
 gg/gg.go           |  86 +++++++++++-
 gg/gg_test.go      |  12 +-
 gg/lexer.go        |   2 +-
 gg/lexer_test.go   |  17 ---
 gg/util_test.go    |  23 ++++
 7 files changed, 579 insertions(+), 25 deletions(-)
 create mode 100644 gg/decoder.go
 create mode 100644 gg/decoder_test.go
 create mode 100644 gg/util_test.go

diff --git a/gg/decoder.go b/gg/decoder.go
new file mode 100644
index 0000000..4e21ceb
--- /dev/null
+++ b/gg/decoder.go
@@ -0,0 +1,318 @@
+package gg
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+)
+
+// Punctuations which are used in the gg file format.
+const (
+	punctTerm       = ";"
+	punctOp         = "<"
+	punctAssign     = "="
+	punctOpenGraph  = "{"
+	punctCloseGraph = "}"
+	punctOpenTuple  = "("
+	punctCloseTuple = ")"
+)
+
+func decoderErr(tok LexerToken, err error) error {
+	return fmt.Errorf("%d:%d: %w", tok.Row, tok.Col, err)
+}
+
+func decoderErrf(tok LexerToken, str string, args ...interface{}) error {
+	return decoderErr(tok, fmt.Errorf(str, args...))
+}
+
+func isPunct(tok LexerToken, val string) bool {
+	return tok.Kind == LexerTokenKindPunctuation && tok.Value == val
+}
+
+func isTerm(tok LexerToken) bool {
+	return isPunct(tok, punctTerm)
+}
+
+// decoder is currently only really used to namespace functions related to
+// decoding Graphs. It may later have actual fields added to it, such as for
+// options passed by the caller.
+type decoder struct{}
+
+// returned boolean value indicates if the token following the single token is a
+// term. If a term followed the first token then it is not included in the
+// returned leftover tokens.
+//
+// if termed is false then leftover tokens cannot be empty.
+func (d *decoder) parseSingleValue(
+	toks []LexerToken,
+) (
+	Value, []LexerToken, bool, error,
+) {
+
+	tok, rest := toks[0], toks[1:]
+
+	if len(rest) == 0 {
+		return Value{}, nil, false, decoderErrf(tok, "cannot be final token, possibly missing %q", punctTerm)
+	}
+
+	termed := isTerm(rest[0])
+
+	if termed {
+		rest = rest[1:]
+	}
+
+	switch tok.Kind {
+
+	case LexerTokenKindName:
+		return Value{Name: &tok.Value}, rest, termed, nil
+
+	case LexerTokenKindNumber:
+
+		i, err := strconv.ParseInt(tok.Value, 10, 64)
+
+		if err != nil {
+			return Value{}, nil, false, decoderErrf(tok, "parsing %q as integer: %w", tok.Value, err)
+		}
+
+		return Value{Number: &i}, rest, termed, nil
+
+	case LexerTokenKindPunctuation:
+		return Value{}, nil, false, decoderErrf(tok, "expected value, found punctuation %q", tok.Value)
+
+	default:
+		panic(fmt.Sprintf("unexpected token kind %q", tok.Kind))
+	}
+}
+
+func (d *decoder) parseOpenEdge(
+	toks []LexerToken,
+) (
+	OpenEdge, []LexerToken, error,
+) {
+
+	if isPunct(toks[0], punctOpenTuple) {
+		return d.parseTuple(toks)
+	}
+
+	var (
+		val    Value
+		termed bool
+		err    error
+	)
+
+	switch {
+
+	case isPunct(toks[0], punctOpenGraph):
+		val, toks, termed, err = d.parseGraphValue(toks, true)
+
+	default:
+		val, toks, termed, err = d.parseSingleValue(toks)
+	}
+
+	if err != nil {
+		return OpenEdge{}, nil, err
+
+	}
+
+	if termed {
+		return ValueOut(val, Value{}), toks, nil
+	}
+
+	opTok, toks := toks[0], toks[1:]
+
+	if !isPunct(opTok, punctOp) {
+		return OpenEdge{}, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm)
+	}
+
+	if len(toks) == 0 {
+		return OpenEdge{}, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp)
+	}
+
+	oe, toks, err := d.parseOpenEdge(toks)
+
+	if err != nil {
+		return OpenEdge{}, nil, err
+	}
+
+	oe = TupleOut([]OpenEdge{oe}, val)
+
+	return oe, toks, nil
+}
+
+func (d *decoder) parseTuple(
+	toks []LexerToken,
+) (
+	OpenEdge, []LexerToken, error,
+) {
+
+	openTok, toks := toks[0], toks[1:]
+
+	var edges []OpenEdge
+
+	for {
+
+		if len(toks) == 0 {
+			return OpenEdge{}, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple)
+
+		} else if isPunct(toks[0], punctCloseTuple) {
+			toks = toks[1:]
+			break
+		}
+
+		var (
+			oe  OpenEdge
+			err error
+		)
+
+		oe, toks, err = d.parseOpenEdge(toks)
+
+		if err != nil {
+			return OpenEdge{}, nil, err
+		}
+
+		edges = append(edges, oe)
+	}
+
+	// this is a quirk of the syntax, _technically_ a tuple doesn't need a
+	// term after it, since it can't be used as an edge value, and so
+	// nothing can come after it in the chain.
+	if len(toks) > 0 && isTerm(toks[0]) {
+		toks = toks[1:]
+	}
+
+	return TupleOut(edges, Value{}), toks, nil
+}
+
+// returned boolean value indicates if the token following the graph is a term.
+// If a term followed the first token then it is not included in the returned
+// leftover tokens.
+//
+// if termed is false then leftover tokens cannot be empty.
+func (d *decoder) parseGraphValue(
+	toks []LexerToken, expectWrappers bool,
+) (
+	Value, []LexerToken, bool, error,
+) {
+
+	var openTok LexerToken
+
+	if expectWrappers {
+		openTok, toks = toks[0], toks[1:]
+	}
+
+	g := ZeroGraph
+
+	for {
+
+		if len(toks) == 0 {
+
+			if !expectWrappers {
+				break
+			}
+
+			return Value{}, nil, false, decoderErrf(openTok, "no matching %q", punctCloseGraph)
+
+		} else if closingTok := toks[0]; isPunct(closingTok, punctCloseGraph) {
+
+			if !expectWrappers {
+				return Value{}, nil, false, decoderErrf(closingTok, "unexpected %q", punctCloseGraph)
+			}
+
+			toks = toks[1:]
+
+			if len(toks) == 0 {
+				return Value{}, nil, false, decoderErrf(closingTok, "cannot be final token, possibly missing %q", punctTerm)
+			}
+
+			break
+		}
+
+		var err error
+
+		if g, toks, err = d.parseValIn(g, toks); err != nil {
+			return Value{}, nil, false, err
+		}
+	}
+
+	val := Value{Graph: g}
+
+	if !expectWrappers {
+		return val, toks, true, nil
+	}
+
+	termed := isTerm(toks[0])
+
+	if termed {
+		toks = toks[1:]
+	}
+
+	return val, toks, termed, nil
+}
+
+func (d *decoder) parseValIn(into *Graph, toks []LexerToken) (*Graph, []LexerToken, error) {
+
+	if len(toks) == 0 {
+		return into, nil, nil
+
+	} else if len(toks) < 3 {
+		return nil, nil, decoderErrf(toks[0], `must be of the form "<name> = ..."`)
+	}
+
+	dst := toks[0]
+	eq := toks[1]
+	toks = toks[2:]
+
+	if dst.Kind != LexerTokenKindName {
+		return nil, nil, decoderErrf(dst, "must be a name")
+
+	} else if !isPunct(eq, punctAssign) {
+		return nil, nil, decoderErrf(eq, "must be %q", punctAssign)
+	}
+
+	oe, toks, err := d.parseOpenEdge(toks)
+
+	if err != nil {
+		return nil, nil, err
+	}
+
+	dstVal := Value{Name: &dst.Value}
+
+	return into.AddValueIn(oe, dstVal), toks, nil
+}
+
+func (d *decoder) decode(lexer Lexer) (*Graph, error) {
+
+	var toks []LexerToken
+
+	for {
+
+		tok, err := lexer.Next()
+
+		if errors.Is(err, io.EOF) {
+			break
+
+		} else if err != nil {
+			return nil, fmt.Errorf("reading next token: %w", err)
+		}
+
+		toks = append(toks, tok)
+	}
+
+	val, _, _, err := d.parseGraphValue(toks, false)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return val.Graph, nil
+}
+
+// DecodeLexer reads lexigraphical tokens from the given Lexer and uses them to
+// construct a Graph according to the rules of the gg file format. DecodeLexer
+// will only return an error if there is a non-EOF file returned from the Lexer,
+// or the tokens read cannot be used to construct a valid Graph.
+func DecodeLexer(lexer Lexer) (*Graph, error) {
+	decoder := &decoder{}
+	return decoder.decode(lexer)
+}
diff --git a/gg/decoder_test.go b/gg/decoder_test.go
new file mode 100644
index 0000000..9876436
--- /dev/null
+++ b/gg/decoder_test.go
@@ -0,0 +1,146 @@
+package gg
+
+import (
+	"strconv"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestDecoder(t *testing.T) {
+
+	i := func(i int64) Value {
+		return Value{Number: &i}
+	}
+
+	n := func(n string) Value {
+		return Value{Name: &n}
+	}
+
+	tests := []struct {
+		in  string
+		exp *Graph
+	}{
+		{
+			in:  "",
+			exp: ZeroGraph,
+		},
+		{
+			in:  "out = 1;",
+			exp: ZeroGraph.AddValueIn(ValueOut(i(1), Value{}), n("out")),
+		},
+		{
+			in:  "out = incr < 1;",
+			exp: ZeroGraph.AddValueIn(ValueOut(i(1), n("incr")), n("out")),
+		},
+		{
+			in: "out = a < b < 1;",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{ValueOut(i(1), n("b"))},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = a < b < (1; c < 2; d < e < 3;);",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{TupleOut(
+						[]OpenEdge{
+							ValueOut(i(1), Value{}),
+							ValueOut(i(2), n("c")),
+							TupleOut(
+								[]OpenEdge{ValueOut(i(3), n("e"))},
+								n("d"),
+							),
+						},
+						n("b"),
+					)},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = a < b < (1; c < (d < 2; 3;); );",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{TupleOut(
+						[]OpenEdge{
+							ValueOut(i(1), Value{}),
+							TupleOut(
+								[]OpenEdge{
+									ValueOut(i(2), n("d")),
+									ValueOut(i(3), Value{}),
+								},
+								n("c"),
+							),
+						},
+						n("b"),
+					)},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = { a = 1; b = c < d < 2; };",
+			exp: ZeroGraph.AddValueIn(
+				ValueOut(
+					Value{Graph: ZeroGraph.
+						AddValueIn(ValueOut(i(1), Value{}), n("a")).
+						AddValueIn(
+							TupleOut(
+								[]OpenEdge{
+									ValueOut(i(2), n("d")),
+								},
+								n("c"),
+							),
+							n("b"),
+						),
+					},
+					Value{},
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "out = a < { b = 1; } < 2;",
+			exp: ZeroGraph.AddValueIn(
+				TupleOut(
+					[]OpenEdge{
+						ValueOut(
+							i(2),
+							Value{Graph: ZeroGraph.
+								AddValueIn(ValueOut(i(1), Value{}), n("b")),
+							},
+						),
+					},
+					n("a"),
+				),
+				n("out"),
+			),
+		},
+		{
+			in: "a = 1; b = 2;",
+			exp: ZeroGraph.
+				AddValueIn(ValueOut(i(1), Value{}), n("a")).
+				AddValueIn(ValueOut(i(2), Value{}), n("b")),
+		},
+	}
+
+	for i, test := range tests {
+		t.Run(strconv.Itoa(i), func(t *testing.T) {
+
+			r := &mockReader{body: []byte(test.in)}
+			lexer := NewLexer(r)
+
+			got, err := DecodeLexer(lexer)
+			assert.NoError(t, err)
+			assert.True(t, Equal(got, test.exp), "\nexp:%v\ngot:%v", test.exp, got)
+
+		})
+	}
+}
diff --git a/gg/gg.go b/gg/gg.go
index a006280..78620e7 100644
--- a/gg/gg.go
+++ b/gg/gg.go
@@ -1,6 +1,11 @@
 // Package gg implements ginger graph creation, traversal, and (de)serialization
 package gg
 
+import (
+	"fmt"
+	"strings"
+)
+
 // Value represents a value being stored in a Graph. No more than one field may
 // be non-nil. No fields being set indicates lack of value.
 type Value struct {
@@ -33,19 +38,40 @@ func (v Value) Equal(v2 Value) bool {
 	}
 }
 
+func (v Value) String() string {
+
+	switch {
+
+	case v == Value{}:
+		return "<noval>"
+
+	case v.Name != nil:
+		return *v.Name
+
+	case v.Number != nil:
+		return fmt.Sprint(*v.Number)
+
+	case v.Graph != nil:
+		return v.Graph.String()
+
+	default:
+		panic("unknown value kind")
+	}
+}
+
 // VertexType enumerates the different possible vertex types.
 type VertexType string
 
 const (
 	// ValueVertex is a Vertex which contains exactly one value and has at least
 	// one edge (either input or output).
-	ValueVertex VertexType = "value"
+	ValueVertex VertexType = "val"
 
 	// TupleVertex is a Vertex which contains two or more in edges and
 	// exactly one out edge
 	//
 	// TODO ^ what about 0 or 1 in edges?
-	TupleVertex VertexType = "tuple"
+	TupleVertex VertexType = "tup"
 )
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -64,6 +90,10 @@ func (oe OpenEdge) WithEdgeVal(val Value) OpenEdge {
 	return oe
 }
 
+func (oe OpenEdge) String() string {
+	return fmt.Sprintf("%s(%s, %s)", oe.fromV.VertexType, oe.fromV.String(), oe.val.String())
+}
+
 // ValueOut creates a OpenEdge which, when used to construct a Graph, represents
 // an edge (with edgeVal attached to it) coming from the ValueVertex containing
 // val.
@@ -79,8 +109,16 @@ func ValueOut(val, edgeVal Value) OpenEdge {
 // returned as-is.
 func TupleOut(ins []OpenEdge, edgeVal Value) OpenEdge {
 
-	if len(ins) == 1 && edgeVal == (Value{}) {
-		return ins[0]
+	if len(ins) == 1 {
+
+		if edgeVal == (Value{}) {
+			return ins[0]
+		}
+
+		if ins[0].val == (Value{}) {
+			return ins[0].WithEdgeVal(edgeVal)
+		}
+
 	}
 
 	return OpenEdge{
@@ -130,6 +168,29 @@ func (v vertex) equal(v2 vertex) bool {
 	return true
 }
 
+func (v vertex) String() string {
+
+	switch v.VertexType {
+
+	case ValueVertex:
+		return v.val.String()
+
+	case TupleVertex:
+
+		strs := make([]string, len(v.tup))
+
+		for i := range v.tup {
+			strs[i] = v.tup[i].String()
+		}
+
+		return fmt.Sprintf("[%s]", strings.Join(strs, ", "))
+
+	default:
+		panic("unknown vertix kind")
+	}
+
+}
+
 type graphValueIn struct {
 	val   Value
 	edges []OpenEdge
@@ -190,8 +251,21 @@ func (g *Graph) cp() *Graph {
 	return cp
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// Graph creation
+func (g *Graph) String() string {
+
+	var strs []string
+
+	for _, valIn := range g.valIns {
+		for _, oe := range valIn.edges {
+			strs = append(
+				strs,
+				fmt.Sprintf("valIn(%s, %s)", oe.String(), valIn.val.String()),
+			)
+		}
+	}
+
+	return fmt.Sprintf("graph(%s)", strings.Join(strs, ", "))
+}
 
 func (g *Graph) valIn(val Value) graphValueIn {
 	for _, valIn := range g.valIns {
diff --git a/gg/gg_test.go b/gg/gg_test.go
index 96e6d5a..32d2d67 100644
--- a/gg/gg_test.go
+++ b/gg/gg_test.go
@@ -57,7 +57,7 @@ func TestEqual(t *testing.T) {
 			exp: false,
 		},
 		{
-			// tuple with a single input edge that has no edgeVal should be
+			// tuple with no edge value and just a single input edge should be
 			// equivalent to just that edge.
 			a: ZeroGraph.AddValueIn(TupleOut([]OpenEdge{
 				ValueOut(i(1), n("ident")),
@@ -65,6 +65,16 @@ func TestEqual(t *testing.T) {
 			b:   ZeroGraph.AddValueIn(ValueOut(i(1), n("ident")), n("out")),
 			exp: true,
 		},
+		{
+			// tuple with an edge value and just a single input edge that has no
+			// edgeVal should be equivalent to just that edge with the tuple's
+			// edge value.
+			a: ZeroGraph.AddValueIn(TupleOut([]OpenEdge{
+				ValueOut(i(1), Value{}),
+			}, n("ident")), n("out")),
+			b:   ZeroGraph.AddValueIn(ValueOut(i(1), n("ident")), n("out")),
+			exp: true,
+		},
 		{
 			a: ZeroGraph.
 				AddValueIn(ValueOut(n("in"), n("incr")), n("out")).
diff --git a/gg/lexer.go b/gg/lexer.go
index 450d5aa..12e10ed 100644
--- a/gg/lexer.go
+++ b/gg/lexer.go
@@ -16,7 +16,7 @@ type LexerError struct {
 }
 
 func (e *LexerError) Error() string {
-	return fmt.Sprintf("%d: %d: %s", e.Col, e.Row, e.Err.Error())
+	return fmt.Sprintf("%d:%d: %s", e.Row, e.Col, e.Err.Error())
 }
 
 func (e *LexerError) Unwrap() error {
diff --git a/gg/lexer_test.go b/gg/lexer_test.go
index 1df7a0d..91e743e 100644
--- a/gg/lexer_test.go
+++ b/gg/lexer_test.go
@@ -9,23 +9,6 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-type mockReader struct {
-	body []byte
-	err  error
-}
-
-func (r *mockReader) Read(b []byte) (int, error) {
-
-	n := copy(b, r.body)
-	r.body = r.body[n:]
-
-	if len(r.body) == 0 {
-		return n, r.err
-	}
-
-	return n, nil
-}
-
 func TestLexer(t *testing.T) {
 
 	expErr := errors.New("eof")
diff --git a/gg/util_test.go b/gg/util_test.go
new file mode 100644
index 0000000..bd9ebd2
--- /dev/null
+++ b/gg/util_test.go
@@ -0,0 +1,23 @@
+package gg
+
+import "io"
+
+type mockReader struct {
+	body []byte
+	err  error
+}
+
+func (r *mockReader) Read(b []byte) (int, error) {
+
+	n := copy(b, r.body)
+	r.body = r.body[n:]
+
+	if len(r.body) == 0 {
+		if r.err == nil {
+			return n, io.EOF
+		}
+		return n, r.err
+	}
+
+	return n, nil
+}