From 360857d5060274ed795bf8fac1106425ce934683 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Sat, 21 Oct 2023 17:42:31 +0200 Subject: [PATCH] Completely refactor gg with new BNF file and decoder The new gg format is based on a BNF file which can be found in the `gg` directory. The code for decoding `.gg` files has been refactored to mirror that file. The result is more resilient parsing, better errors, and a greater ability to extend the format in the future. The new decoder is notable in that it does not use a lexer. Both lexing and parsing are done in a single step. The format syntax itself has also been modified. Rather than using semi-colons everywhere, commas are used as separators in tuples. Additionally the final comma/semi-colon is no longer required. --- cmd/eval/main.go | 6 +- examples/fib.gg | 33 +-- gg/decoder.go | 362 ++++---------------------------- gg/decoder_test.go | 149 ------------- gg/gg.bnf | 23 ++ gg/gg.go | 68 ++++-- gg/lexer.go | 292 -------------------------- gg/lexer_test.go | 150 ------------- gg/location.go | 36 ++++ gg/term.go | 508 +++++++++++++++++++++++++++++++++++++++++++++ gg/term_test.go | 387 ++++++++++++++++++++++++++++++++++ gg/util_test.go | 23 -- go.mod | 5 +- go.sum | 2 + graph/graph.go | 3 +- vm/function.go | 14 +- vm/vm.go | 14 +- vm/vm_test.go | 6 +- 18 files changed, 1085 insertions(+), 996 deletions(-) delete mode 100644 gg/decoder_test.go create mode 100644 gg/gg.bnf delete mode 100644 gg/lexer.go delete mode 100644 gg/lexer_test.go create mode 100644 gg/location.go create mode 100644 gg/term.go create mode 100644 gg/term_test.go delete mode 100644 gg/util_test.go diff --git a/cmd/eval/main.go b/cmd/eval/main.go index e9ff261..b696508 100644 --- a/cmd/eval/main.go +++ b/cmd/eval/main.go @@ -19,10 +19,7 @@ func main() { opSrc := os.Args[1] inSrc := os.Args[2] - inVal, err := gg.DecodeSingleValueFromLexer( - gg.NewLexer(bytes.NewBufferString(inSrc + ";")), - ) - + inVal, err := gg.NewDecoder(bytes.NewBufferString(inSrc)).Next() if err != nil { panic(fmt.Sprintf("decoding input: %v", err)) } @@ -32,7 +29,6 @@ func main() { vm.Value{Value: inVal}, vm.GlobalScope, ) - if err != nil { panic(fmt.Sprintf("evaluating: %v", err)) } diff --git a/examples/fib.gg b/examples/fib.gg index 38943a7..9c875ff 100644 --- a/examples/fib.gg +++ b/examples/fib.gg @@ -1,19 +1,24 @@ -out = { +* A function which accepts a number N and returns the Nth fibonacci number +{ + * We are passing a tuple of inputs into a graph here, such that the graph is + * evaluated as an anonymous function. That anonymous function uses recur + * internally to compute the result. + out = { - decr = { out = add < (in; -1;); }; + * A little helper function. + decr = { out = add < (in, -1) }; - n = tupEl < (in; 0;); - a = tupEl < (in; 1;); - b = tupEl < (in; 2;); + * Deconstruct the input tuple into its individual elements, for clarity. + * There will be a more ergonomic way of doing this one day. + n = tupEl < (in, 0); + a = tupEl < (in, 1); + b = tupEl < (in, 2); - out = if < ( - isZero < n; - a; - recur < ( - decr < n; - b; - add < (a;b;); + out = if < ( + isZero < n, + a, + recur < ( decr 0 { + d.lastRead = d.unread[len(d.unread)-1] + d.unread = d.unread[:len(d.unread)-1] + return d.lastRead, nil } - var ( - val Value - termed bool - err error - ) - - switch { - - case isPunct(toks[0], punctOpenGraph): - val, toks, termed, err = d.parseGraphValue(toks, true) - - default: - val, toks, termed, err = d.parseSingleValue(toks) - } + loc := d.brNextLoc + r, _, err := d.br.ReadRune() if err != nil { - return nil, nil, err - - } - - if termed { - return graph.ValueOut[Value](ZeroValue, val), toks, nil - } - - opTok, toks := toks[0], toks[1:] - - if !isPunct(opTok, punctOp) { - return nil, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm) - } - - if len(toks) == 0 { - return nil, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp) - } - - oe, toks, err := d.parseOpenEdge(toks) - - if err != nil { - return nil, nil, err - } - - oe = graph.TupleOut[Value](val, oe) - - return oe, toks, nil -} - -func (d *decoder) parseTuple( - toks []LexerToken, -) ( - *OpenEdge, []LexerToken, error, -) { - - openTok, toks := toks[0], toks[1:] - - var edges []*OpenEdge - - for { - - if len(toks) == 0 { - return nil, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple) - - } else if isPunct(toks[0], punctCloseTuple) { - toks = toks[1:] - break - } - - var ( - oe *OpenEdge - err error - ) - - oe, toks, err = d.parseOpenEdge(toks) - - if err != nil { - return nil, nil, err - } - - edges = append(edges, oe) - } - - // this is a quirk of the syntax, _technically_ a tuple doesn't need a - // term after it, since it can't be used as an edge value, and so - // nothing can come after it in the chain. - if len(toks) > 0 && isTerm(toks[0]) { - toks = toks[1:] - } - - return graph.TupleOut[Value](ZeroValue, edges...), toks, nil -} - -// returned boolean value indicates if the token following the graph is a term. -// If a term followed the first token then it is not included in the returned -// leftover tokens. -// -// if termed is false then leftover tokens cannot be empty. -func (d *decoder) parseGraphValue( - toks []LexerToken, expectWrappers bool, -) ( - Value, []LexerToken, bool, error, -) { - - var openTok LexerToken - - if expectWrappers { - openTok, toks = toks[0], toks[1:] - } - - g := new(Graph) - - for { - - if len(toks) == 0 { - - if !expectWrappers { - break - } - - return ZeroValue, nil, false, decoderErrf(openTok, "no matching %q", punctCloseGraph) - - } else if closingTok := toks[0]; isPunct(closingTok, punctCloseGraph) { - - if !expectWrappers { - return ZeroValue, nil, false, decoderErrf(closingTok, "unexpected %q", punctCloseGraph) - } - - toks = toks[1:] - - if len(toks) == 0 { - return ZeroValue, nil, false, decoderErrf(closingTok, "cannot be final token, possibly missing %q", punctTerm) - } - - break - } - - var err error - - if g, toks, err = d.parseValIn(g, toks); err != nil { - return ZeroValue, nil, false, err - } - } - - val := Value{Graph: g} - - if !expectWrappers { - return val, toks, true, nil + return d.lastRead, err } - val.LexerToken = &openTok - - termed := isTerm(toks[0]) - - if termed { - toks = toks[1:] + if r == '\n' { + d.brNextLoc.Row++ + d.brNextLoc.Col = 1 + } else { + d.brNextLoc.Col++ } - return val, toks, termed, nil + d.lastRead = locatableRune{loc, r} + return d.lastRead, nil } -func (d *decoder) parseValIn(into *Graph, toks []LexerToken) (*Graph, []LexerToken, error) { - - if len(toks) == 0 { - return into, nil, nil - - } else if len(toks) < 3 { - return nil, nil, decoderErrf(toks[0], `must be of the form " = ..."`) +func (d *Decoder) unreadRune(lr locatableRune) { + if d.lastRead != lr { + panic(fmt.Sprintf( + "unreading rune %#v, but last read rune was %#v", lr, d.lastRead, + )) } - dst := toks[0] - eq := toks[1] - toks = toks[2:] - - if dst.Kind != LexerTokenKindName { - return nil, nil, decoderErrf(dst, "must be a name") - - } else if !isPunct(eq, punctAssign) { - return nil, nil, decoderErrf(eq, "must be %q", punctAssign) - } - - oe, toks, err := d.parseOpenEdge(toks) - - if err != nil { - return nil, nil, err - } - - dstVal := Value{Name: &dst.Value, LexerToken: &dst} - - return into.AddValueIn(dstVal, oe), toks, nil + d.unread = append(d.unread, lr) } -func (d *decoder) readAllTokens(lexer Lexer) ([]LexerToken, error) { - - var toks []LexerToken - - for { - - tok, err := lexer.Next() - - if errors.Is(err, io.EOF) { - break - - } else if err != nil { - return nil, fmt.Errorf("reading next token: %w", err) - } - - toks = append(toks, tok) +func (d *Decoder) nextLoc() Location { + if len(d.unread) > 0 { + return d.unread[len(d.unread)-1].Location } - return toks, nil + return d.brNextLoc } -func (d *decoder) decode(lexer Lexer) (*Graph, error) { - - toks, err := d.readAllTokens(lexer) - - if err != nil { - return nil, err - } - - val, _, _, err := d.parseGraphValue(toks, false) - - if err != nil { - return nil, err - } - - return val.Graph, nil -} - -// DecodeLexer reads lexigraphical tokens from the given Lexer and uses them to -// construct a Graph according to the rules of the gg file format. DecodeLexer -// will only return an error if there is a non-EOF file returned from the Lexer, -// or the tokens read cannot be used to construct a valid Graph. -func DecodeLexer(lexer Lexer) (*Graph, error) { - decoder := &decoder{} - return decoder.decode(lexer) -} - -func DecodeSingleValueFromLexer(lexer Lexer) (Value, error) { - decoder := &decoder{} - - toks, err := decoder.readAllTokens(lexer) - - if err != nil { - return ZeroValue, err - } - - val, _, _, err := decoder.parseSingleValue(toks) - - return val, err +// Next returns the next top-level value in the stream, or io.EOF. +func (d *Decoder) Next() (Value, error) { + return topLevelTerm.decodeFn(d) } diff --git a/gg/decoder_test.go b/gg/decoder_test.go deleted file mode 100644 index 417668b..0000000 --- a/gg/decoder_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package gg - -import ( - "strconv" - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/mediocregopher/ginger/graph" -) - -func TestDecoder(t *testing.T) { - - zeroGraph := new(Graph) - - i := func(i int64) Value { - return Value{Number: &i} - } - - n := func(n string) Value { - return Value{Name: &n} - } - - vOut := func(edgeVal, val Value) *OpenEdge { - return graph.ValueOut(edgeVal, val) - } - - tOut := func(edgeVal Value, ins ...*OpenEdge) *OpenEdge { - return graph.TupleOut(edgeVal, ins...) - } - - tests := []struct { - in string - exp *Graph - }{ - { - in: "", - exp: zeroGraph, - }, - { - in: "out = 1;", - exp: zeroGraph.AddValueIn(n("out"), vOut(ZeroValue, i(1))), - }, - { - in: "out = incr < 1;", - exp: zeroGraph.AddValueIn(n("out"), vOut(n("incr"), i(1))), - }, - { - in: "out = a < b < 1;", - exp: zeroGraph.AddValueIn( - n("out"), - tOut( - n("a"), - vOut(n("b"), - i(1)), - ), - ), - }, - { - in: "out = a < b < (1; c < 2; d < e < 3;);", - exp: zeroGraph.AddValueIn( - n("out"), - tOut( - n("a"), - tOut( - n("b"), - vOut(ZeroValue, i(1)), - vOut(n("c"), i(2)), - tOut( - n("d"), - vOut(n("e"), i(3)), - ), - ), - ), - ), - }, - { - in: "out = a < b < (1; c < (d < 2; 3;); );", - exp: zeroGraph.AddValueIn( - n("out"), - tOut( - n("a"), - tOut( - n("b"), - vOut(ZeroValue, i(1)), - tOut( - n("c"), - vOut(n("d"), i(2)), - vOut(ZeroValue, i(3)), - ), - ), - ), - ), - }, - { - in: "out = { a = 1; b = c < d < 2; };", - exp: zeroGraph.AddValueIn( - n("out"), - vOut( - ZeroValue, - Value{Graph: zeroGraph. - AddValueIn(n("a"), vOut(ZeroValue, i(1))). - AddValueIn( - n("b"), - tOut( - n("c"), - vOut(n("d"), i(2)), - ), - ), - }, - ), - ), - }, - { - in: "out = a < { b = 1; } < 2;", - exp: zeroGraph.AddValueIn( - n("out"), - tOut( - n("a"), - vOut( - Value{Graph: zeroGraph. - AddValueIn(n("b"), vOut(ZeroValue, i(1))), - }, - i(2), - ), - ), - ), - }, - { - in: "a = 1; b = 2;", - exp: zeroGraph. - AddValueIn(n("a"), vOut(ZeroValue, i(1))). - AddValueIn(n("b"), vOut(ZeroValue, i(2))), - }, - } - - for i, test := range tests { - t.Run(strconv.Itoa(i), func(t *testing.T) { - - r := &mockReader{body: []byte(test.in)} - lexer := NewLexer(r) - - got, err := DecodeLexer(lexer) - assert.NoError(t, err) - assert.True(t, got.Equal(test.exp), "\nexp:%v\ngot:%v", test.exp, got) - - }) - } -} diff --git a/gg/gg.bnf b/gg/gg.bnf new file mode 100644 index 0000000..a625749 --- /dev/null +++ b/gg/gg.bnf @@ -0,0 +1,23 @@ + ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + ::= + + ::= "-" + ::= | + + ::= ( | ) ( | | )* + + ::= "(" + ::= ")" | + ::= + | + ::= ")" | "," + ::= | "<" + + ::= "{" + ::= "}" | "=" + ::= + | + ::= "}" | ";" + ::= | "<" + + ::= | | + ::= | diff --git a/gg/gg.go b/gg/gg.go index 99e7cc1..c57d9f9 100644 --- a/gg/gg.go +++ b/gg/gg.go @@ -7,23 +7,20 @@ import ( "github.com/mediocregopher/ginger/graph" ) -// ZeroValue is a Value with no fields set. -var ZeroValue Value +// Type aliases for convenience +type ( + Graph = graph.Graph[OptionalValue, Value] + OpenEdge = graph.OpenEdge[OptionalValue, Value] +) // Value represents a value which can be serialized by the gg text format. type Value struct { + Location // Only one of these fields may be set Name *string Number *int64 Graph *Graph - - // TODO coming soon! - // String *string - - // Optional fields indicating the token which was used to construct this - // Value, if any. - LexerToken *LexerToken } // Name returns a name Value. @@ -36,12 +33,6 @@ func Number(n int64) Value { return Value{Number: &n} } -// IsZero returns true if the Value is the zero value (none of the sub-value -// fields are set). LexerToken is ignored for this check. -func (v Value) IsZero() bool { - return v.Equal(ZeroValue) -} - // Equal returns true if the passed in Value is equivalent, ignoring the // LexerToken on either Value. // @@ -50,13 +41,8 @@ func (v Value) Equal(v2g graph.Value) bool { v2 := v2g.(Value) - v.LexerToken, v2.LexerToken = nil, nil - switch { - case v == ZeroValue && v2 == ZeroValue: - return true - case v.Name != nil && v2.Name != nil && *v.Name == *v2.Name: return true @@ -85,6 +71,46 @@ func (v Value) String() string { return v.Graph.String() default: - return "" + panic("no fields set on Value") } } + +// OptionalValue is a Value which may be unset. This is used for edge values, +// since edges might not have a value. +type OptionalValue struct { + Value + Valid bool +} + +// None is the zero OptionalValue (hello rustaceans). +var None OptionalValue + +// Some wraps a Value to be an OptionalValue. +func Some(v Value) OptionalValue { + return OptionalValue{Valid: true, Value: v} +} + +func (v OptionalValue) String() string { + if !v.Valid { + return "" + } + return v.Value.String() +} + +func (v OptionalValue) Equal(v2g graph.Value) bool { + var v2 OptionalValue + + if v2Val, ok := v2g.(Value); ok { + v2 = Some(v2Val) + } else { + v2 = v2g.(OptionalValue) + } + + if v.Valid != v2.Valid { + return false + } else if !v.Valid { + return true + } + + return v.Value.Equal(v2.Value) +} diff --git a/gg/lexer.go b/gg/lexer.go deleted file mode 100644 index 8acbdbc..0000000 --- a/gg/lexer.go +++ /dev/null @@ -1,292 +0,0 @@ -package gg - -import ( - "bufio" - "fmt" - "io" - "strings" - "unicode" -) - -// LexerLocation describes the location in a file where a particular token was -// parsed from. -type LexerLocation struct { - Row, Col int -} - -func (l LexerLocation) String() string { - return fmt.Sprintf("%d:%d", l.Row, l.Col) -} - -// LexerError is returned by Lexer when an unexpected error occurs parsing a -// stream of LexerTokens. -type LexerError struct { - Err error - - Location LexerLocation -} - -func (e *LexerError) Error() string { - return fmt.Sprintf("%s: %s", e.Location.String(), e.Err.Error()) -} - -func (e *LexerError) Unwrap() error { - return e.Err -} - -// LexerTokenKind enumerates the different kinds of LexerToken there can be. -type LexerTokenKind string - -// Enumeration of LexerTokenKinds. -const ( - LexerTokenKindName LexerTokenKind = "name" - LexerTokenKindNumber LexerTokenKind = "number" - LexerTokenKindPunctuation LexerTokenKind = "punctuation" -) - -// LexerToken describes a lexigraphical token which is used when deserializing -// Graphs. -type LexerToken struct { - Kind LexerTokenKind - Value string // never empty string - - Location LexerLocation -} - -func (t LexerToken) errPrefix() string { - return fmt.Sprintf("%s: at %q", t.Location.String(), t.Value) -} - -// Lexer is used to parse a string stream into a sequence of tokens which can -// then be parsed by a Parser. -type Lexer interface { - - // Next will return a LexerToken or a LexerError. io.EOF (wrapped in a - // LexerError) is returned if the stream being read from is finished. - Next() (LexerToken, error) -} - -type lexer struct { - r *bufio.Reader - stringBuilder *strings.Builder - err *LexerError - - // these fields are only needed to keep track of the current "cursor" - // position when reading. - lastRow, lastCol int - prevRune rune -} - -// NewLexer wraps the io.Reader in a Lexer, which will read the io.Reader as a -// sequence of utf-8 characters and parse it into a sequence of LexerTokens. -func NewLexer(r io.Reader) Lexer { - return &lexer{ - r: bufio.NewReader(r), - lastRow: 0, - lastCol: -1, - stringBuilder: new(strings.Builder), - } -} - -// nextRowCol returns the row and column number which the next rune in the -// stream would be at. -func (l *lexer) nextRowCol() (int, int) { - - if l.prevRune == '\n' { - return l.lastRow + 1, 0 - } - - return l.lastRow, l.lastCol + 1 -} - -func (l *lexer) fmtErr(err error) *LexerError { - - row, col := l.nextRowCol() - - return &LexerError{ - Err: err, - Location: LexerLocation{ - Row: row, - Col: col, - }, - } -} - -func (l *lexer) fmtErrf(str string, args ...interface{}) *LexerError { - return l.fmtErr(fmt.Errorf(str, args...)) -} - -// discardRune must _always_ be called only after peekRune. -func (l *lexer) discardRune() { - - r, _, err := l.r.ReadRune() - - if err != nil { - panic(err) - } - - l.lastRow, l.lastCol = l.nextRowCol() - l.prevRune = r -} - -func (l *lexer) peekRune() (rune, error) { - - r, _, err := l.r.ReadRune() - - if err != nil { - return '0', err - - } else if err := l.r.UnreadRune(); err != nil { - - // since the most recent operation on the bufio.Reader was a ReadRune, - // UnreadRune should never return an error - panic(err) - } - - return r, nil -} - -// readWhile reads runes until the given predicate returns false, and returns a -// LexerToken of the given kind whose Value is comprised of all runes which -// returned true. -// -// If an error is encountered then both the token (or what's been parsed of it -// so far) and the error are returned. -func (l *lexer) readWhile( - kind LexerTokenKind, pred func(rune) bool, -) ( - LexerToken, *LexerError, -) { - - row, col := l.nextRowCol() - - l.stringBuilder.Reset() - - var lexErr *LexerError - - for { - - r, err := l.peekRune() - - if err != nil { - lexErr = l.fmtErrf("peeking next character: %w", err) - break - - } else if !pred(r) { - break - } - - l.stringBuilder.WriteRune(r) - - l.discardRune() - } - - return LexerToken{ - Kind: kind, - Value: l.stringBuilder.String(), - Location: LexerLocation{ - Row: row, Col: col, - }, - }, lexErr -} - -// we only support base-10 integers at the moment. -func isNumber(r rune) bool { - return r == '-' || ('0' <= r && r <= '9') -} - -// next can return a token, an error, or both. If an error is returned then no -// further calls to next should occur. -func (l *lexer) next() (LexerToken, *LexerError) { - - for { - - r, err := l.peekRune() - - if err != nil { - return LexerToken{}, l.fmtErrf("peeking next character: %w", err) - } - - switch { - - case r == '*': // comment - - // comments are everything up until a newline - _, err := l.readWhile("", func(r rune) bool { - return r != '\n' - }) - - if err != nil { - return LexerToken{}, err - } - - // terminating newline will be discarded on next loop - - case r == '"' || r == '`': - - // reserve double-quote and backtick for string parsing. - l.discardRune() - return LexerToken{}, l.fmtErrf("string parsing not yet implemented") - - case unicode.IsLetter(r): - // letters denote the start of a name - - return l.readWhile(LexerTokenKindName, func(r rune) bool { - - if unicode.In(r, unicode.Letter, unicode.Number, unicode.Mark) { - return true - } - - if r == '-' { - return true - } - - return false - }) - - case isNumber(r): - return l.readWhile(LexerTokenKindNumber, isNumber) - - case unicode.IsPunct(r) || unicode.IsSymbol(r): - // symbols are also considered punctuation - - l.discardRune() - - return LexerToken{ - Kind: LexerTokenKindPunctuation, - Value: string(r), - Location: LexerLocation{ - Row: l.lastRow, - Col: l.lastCol, - }, - }, nil - - case unicode.IsSpace(r): - l.discardRune() - - default: - return LexerToken{}, l.fmtErrf("unexpected character %q", r) - } - - } -} - -func (l *lexer) Next() (LexerToken, error) { - - if l.err != nil { - return LexerToken{}, l.err - } - - tok, err := l.next() - - if err != nil { - - l.err = err - - if tok.Kind == "" { - return LexerToken{}, l.err - } - } - - return tok, nil -} diff --git a/gg/lexer_test.go b/gg/lexer_test.go deleted file mode 100644 index 038dba0..0000000 --- a/gg/lexer_test.go +++ /dev/null @@ -1,150 +0,0 @@ -package gg - -import ( - "errors" - "strconv" - "strings" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestLexer(t *testing.T) { - - expErr := errors.New("eof") - - tests := []struct { - in string - exp []LexerToken - }{ - {in: "", exp: []LexerToken{}}, - {in: "* fooo\n", exp: []LexerToken{}}, - { - in: "foo", - exp: []LexerToken{ - { - Kind: LexerTokenKindName, - Value: "foo", - Location: LexerLocation{Row: 0, Col: 0}, - }, - }, - }, - { - in: "foo bar\nf-o f0O Foo", - exp: []LexerToken{ - { - Kind: LexerTokenKindName, - Value: "foo", - Location: LexerLocation{Row: 0, Col: 0}, - }, - { - Kind: LexerTokenKindName, - Value: "bar", - Location: LexerLocation{Row: 0, Col: 4}, - }, - { - Kind: LexerTokenKindName, - Value: "f-o", - Location: LexerLocation{Row: 1, Col: 0}, - }, - { - Kind: LexerTokenKindName, - Value: "f0O", - Location: LexerLocation{Row: 1, Col: 4}, - }, - { - Kind: LexerTokenKindName, - Value: "Foo", - Location: LexerLocation{Row: 1, Col: 8}, - }, - }, - }, - { - in: "1 100 -100", - exp: []LexerToken{ - { - Kind: LexerTokenKindNumber, - Value: "1", - Location: LexerLocation{Row: 0, Col: 0}, - }, - { - Kind: LexerTokenKindNumber, - Value: "100", - Location: LexerLocation{Row: 0, Col: 2}, - }, - { - Kind: LexerTokenKindNumber, - Value: "-100", - Location: LexerLocation{Row: 0, Col: 6}, - }, - }, - }, - { - in: "1<2!-3 ()", - exp: []LexerToken{ - { - Kind: LexerTokenKindNumber, - Value: "1", - Location: LexerLocation{Row: 0, Col: 0}, - }, - { - Kind: LexerTokenKindPunctuation, - Value: "<", - Location: LexerLocation{Row: 0, Col: 1}, - }, - { - Kind: LexerTokenKindNumber, - Value: "2", - Location: LexerLocation{Row: 0, Col: 2}, - }, - { - Kind: LexerTokenKindPunctuation, - Value: "!", - Location: LexerLocation{Row: 0, Col: 3}, - }, - { - Kind: LexerTokenKindNumber, - Value: "-3", - Location: LexerLocation{Row: 0, Col: 4}, - }, - { - Kind: LexerTokenKindPunctuation, - Value: "(", - Location: LexerLocation{Row: 0, Col: 7}, - }, - { - Kind: LexerTokenKindPunctuation, - Value: ")", - Location: LexerLocation{Row: 0, Col: 8}, - }, - }, - }, - } - - for i, test := range tests { - t.Run(strconv.Itoa(i), func(t *testing.T) { - - lexer := NewLexer(&mockReader{body: []byte(test.in), err: expErr}) - - for i := range test.exp { - tok, err := lexer.Next() - assert.NoError(t, err) - assert.Equal(t, test.exp[i], tok, "test.exp[%d]", i) - } - - tok, err := lexer.Next() - assert.ErrorIs(t, err, expErr) - assert.Equal(t, LexerToken{}, tok) - - lexErr := new(LexerError) - assert.True(t, errors.As(err, &lexErr)) - - inParts := strings.Split(test.in, "\n") - - assert.ErrorIs(t, lexErr, expErr) - assert.Equal(t, lexErr.Location.Row, len(inParts)-1) - assert.Equal(t, lexErr.Location.Col, len(inParts[len(inParts)-1])) - }) - } - -} diff --git a/gg/location.go b/gg/location.go new file mode 100644 index 0000000..a1de1aa --- /dev/null +++ b/gg/location.go @@ -0,0 +1,36 @@ +package gg + +import "fmt" + +// Location indicates a position in a stream of bytes identified by column +// within newline-separated rows. +type Location struct { + Row, Col int +} + +func (l Location) errf(str string, args ...any) LocatedError { + return LocatedError{l, fmt.Errorf(str, args...)} +} + +func (l Location) locate() Location { return l } + +// LocatedError is an error related to a specific point within a decode gg +// stream. +type LocatedError struct { + Location + Err error +} + +func (e LocatedError) Error() string { + return fmt.Sprintf("%d:%d: %v", e.Row, e.Col, e.Err) +} + +type locatableRune struct { + Location + r rune +} + +type locatableString struct { + Location + str string +} diff --git a/gg/term.go b/gg/term.go new file mode 100644 index 0000000..fa43dbd --- /dev/null +++ b/gg/term.go @@ -0,0 +1,508 @@ +package gg + +import ( + "errors" + "fmt" + "io" + "strconv" + "strings" + "unicode" + + "github.com/mediocregopher/ginger/graph" + "golang.org/x/exp/slices" +) + +var ( + errNoMatch = errors.New("not found") +) + +type stringerFn func() string + +func (fn stringerFn) String() string { + return fn() +} + +type stringerStr string + +func (str stringerStr) String() string { + return string(str) +} + +type term[T any] struct { + name fmt.Stringer + decodeFn func(d *Decoder) (T, error) +} + +func (t term[T]) String() string { + return t.name.String() +} + +func firstOf[T any](terms ...*term[T]) *term[T] { + if len(terms) < 2 { + panic("firstOfTerms requires at least 2 terms") + } + + return &term[T]{ + name: stringerFn(func() string { + descrs := make([]string, len(terms)) + for i := range terms { + descrs[i] = terms[i].String() + } + return strings.Join(descrs, " or ") + }), + decodeFn: func(d *Decoder) (T, error) { + var zero T + for _, t := range terms { + v, err := t.decodeFn(d) + if errors.Is(err, errNoMatch) { + continue + } else if err != nil { + return zero, err + } + + return v, nil + } + + return zero, errNoMatch + }, + } +} + +func seq[Ta, Tb, Tc any]( + name fmt.Stringer, + termA *term[Ta], + termB *term[Tb], + fn func(Ta, Tb) Tc, +) *term[Tc] { + return &term[Tc]{ + name: name, + decodeFn: func(d *Decoder) (Tc, error) { + var zero Tc + + va, err := termA.decodeFn(d) + if err != nil { + return zero, err + } + + vb, err := termB.decodeFn(d) + if errors.Is(err, errNoMatch) { + return zero, d.nextLoc().errf("expected %v", termB) + } else if err != nil { + return zero, err + } + + return fn(va, vb), nil + }, + } +} + +func prefixed[Ta, Tb any](termA *term[Ta], termB *term[Tb]) *term[Tb] { + return seq(termA, termA, termB, func(_ Ta, b Tb) Tb { + return b + }) +} + +func prefixIgnored[Ta, Tb any](termA *term[Ta], termB *term[Tb]) *term[Tb] { + return &term[Tb]{ + name: termB, + decodeFn: func(d *Decoder) (Tb, error) { + var zero Tb + if _, err := termA.decodeFn(d); err != nil { + return zero, err + } + return termB.decodeFn(d) + }, + } +} + +func suffixIgnored[Ta, Tb any]( + termA *term[Ta], termB *term[Tb], +) *term[Ta] { + return seq(termA, termA, termB, func(a Ta, _ Tb) Ta { + return a + }) +} + +func oneOrMore[T any](t *term[T]) *term[[]T] { + return &term[[]T]{ + name: stringerFn(func() string { + return fmt.Sprintf("one or more %v", t) + }), + decodeFn: func(d *Decoder) ([]T, error) { + var vv []T + for { + v, err := t.decodeFn(d) + if errors.Is(err, errNoMatch) { + break + } else if err != nil { + return nil, err + } + + vv = append(vv, v) + } + + if len(vv) == 0 { + return nil, errNoMatch + } + + return vv, nil + }, + } +} + +func zeroOrMore[T any](t *term[T]) *term[[]T] { + return &term[[]T]{ + name: stringerFn(func() string { + return fmt.Sprintf("zero or more %v", t) + }), + decodeFn: func(d *Decoder) ([]T, error) { + var vv []T + for { + v, err := t.decodeFn(d) + if errors.Is(err, errNoMatch) { + break + } else if err != nil { + return nil, err + } + + vv = append(vv, v) + } + + return vv, nil + }, + } +} + +func mapTerm[Ta, Tb any]( + name fmt.Stringer, t *term[Ta], fn func(Ta) Tb, +) *term[Tb] { + return &term[Tb]{ + name: name, + decodeFn: func(d *Decoder) (Tb, error) { + var zero Tb + va, err := t.decodeFn(d) + if err != nil { + return zero, err + } + return fn(va), nil + }, + } +} + +func runePredTerm( + name fmt.Stringer, pred func(rune) bool, +) *term[locatableRune] { + return &term[locatableRune]{ + name: name, + decodeFn: func(d *Decoder) (locatableRune, error) { + lr, err := d.readRune() + if errors.Is(err, io.EOF) { + return locatableRune{}, errNoMatch + } else if err != nil { + return locatableRune{}, err + } + + if !pred(lr.r) { + d.unreadRune(lr) + return locatableRune{}, errNoMatch + } + + return lr, nil + }, + } +} + +func runeTerm(r rune) *term[locatableRune] { + return runePredTerm( + stringerStr(fmt.Sprintf("'%c'", r)), + func(r2 rune) bool { return r2 == r }, + ) +} + +func locatableRunesToString(rr []locatableRune) string { + str := make([]rune, len(rr)) + for i := range rr { + str[i] = rr[i].r + } + return string(str) +} + +func runesToStringTerm( + t *term[[]locatableRune], +) *term[locatableString] { + return mapTerm( + t, t, func(rr []locatableRune) locatableString { + return locatableString{rr[0].locate(), locatableRunesToString(rr)} + }, + ) +} + +func discard[T any](t *term[T]) *term[struct{}] { + return mapTerm(t, t, func(_ T) struct{} { return struct{}{} }) +} + +var ( + notNewlineTerm = runePredTerm( + stringerStr("not-newline"), + func(r rune) bool { return r != '\n' }, + ) + + commentTerm = prefixed( + prefixed(runeTerm('*'), zeroOrMore(notNewlineTerm)), + runeTerm('\n'), + ) + + whitespaceTerm = zeroOrMore(firstOf( + discard(runePredTerm(stringerStr("whitespace"), unicode.IsSpace)), + discard(commentTerm), + )) +) + +func trimmedTerm[T any](t *term[T]) *term[T] { + t = prefixIgnored(whitespaceTerm, t) + t = suffixIgnored(t, whitespaceTerm) + return t +} + +func trimmedRuneTerm(r rune) *term[locatableRune] { + return trimmedTerm(runeTerm(r)) +} + +var ( + digitTerm = runePredTerm( + stringerStr("digit"), + func(r rune) bool { return '0' <= r && r <= '9' }, + ) + + positiveNumberTerm = runesToStringTerm(oneOrMore(digitTerm)) + + negativeNumberTerm = seq( + stringerStr("negative-number"), + runeTerm('-'), + positiveNumberTerm, + func(neg locatableRune, posNum locatableString) locatableString { + return locatableString{neg.locate(), string(neg.r) + posNum.str} + }, + ) + + numberTerm = mapTerm( + stringerStr("number"), + firstOf(negativeNumberTerm, positiveNumberTerm), + func(str locatableString) Value { + i, err := strconv.ParseInt(str.str, 10, 64) + if err != nil { + panic(fmt.Errorf("parsing %q as int: %w", str, err)) + } + + return Value{Number: &i, Location: str.locate()} + }, + ) +) + +var ( + letterTerm = runePredTerm( + stringerStr("letter"), + func(r rune) bool { + return unicode.In(r, unicode.Letter, unicode.Mark) + }, + ) + + letterTailTerm = zeroOrMore(firstOf(letterTerm, digitTerm)) + + nameTerm = seq( + stringerStr("name"), + letterTerm, + letterTailTerm, + func(head locatableRune, tail []locatableRune) Value { + name := string(head.r) + locatableRunesToString(tail) + return Value{Name: &name, Location: head.locate()} + }, + ) +) + +func openEdgeIntoValue(val Value, oe *OpenEdge) *OpenEdge { + switch { + case oe == nil: + return graph.ValueOut(None, val) + case !oe.EdgeValue().Valid: + return oe.WithEdgeValue(Some(val)) + default: + return graph.TupleOut(Some(val), oe) + } +} + +var graphTerm, valueTerm = func() (*term[Value], *term[Value]) { + type tupleState struct { + ins []*OpenEdge + oe *OpenEdge + } + + type graphState struct { + g *Graph + oe *OpenEdge + } + + var ( + rightParenthesis = trimmedRuneTerm(')') + tupleEndTerm = mapTerm( + rightParenthesis, + rightParenthesis, + func(lr locatableRune) tupleState { + // if ')', then map that to an empty state. This acts as a + // sentinel value to indicate "end of tuple". + return tupleState{} + }, + ) + + rightCurlyBrace = trimmedRuneTerm('}') + graphEndTerm = mapTerm( + rightCurlyBrace, + rightCurlyBrace, + func(lr locatableRune) graphState { + // if '}', then map that to an empty state. This acts as a + // sentinel value to indicate "end of graph". + return graphState{} + }, + ) + ) + + var ( + // pre-define these, and then fill in the pointers after, in order to + // deal with recursive dependencies between them. + valueTerm = new(term[Value]) + + tupleTerm = new(term[*OpenEdge]) + tupleTailTerm = new(term[tupleState]) + tupleOpenEdgeTerm = new(term[tupleState]) + tupleOpenEdgeTailTerm = new(term[tupleState]) + tupleOpenEdgeValueTailTerm = new(term[tupleState]) + + graphTerm = new(term[Value]) + graphTailTerm = new(term[graphState]) + graphOpenEdgeTerm = new(term[graphState]) + graphOpenEdgeTailTerm = new(term[graphState]) + graphOpenEdgeValueTailTerm = new(term[graphState]) + ) + + *tupleTerm = *seq( + stringerStr("tuple"), + trimmedRuneTerm('('), + tupleTailTerm, + func(lr locatableRune, ts tupleState) *OpenEdge { + slices.Reverse(ts.ins) + return graph.TupleOut(None, ts.ins...) + }, + ) + + *tupleTailTerm = *firstOf( + tupleEndTerm, + mapTerm( + tupleOpenEdgeTerm, + tupleOpenEdgeTerm, + func(ts tupleState) tupleState { + ts.ins = append(ts.ins, ts.oe) + ts.oe = nil + return ts + }, + ), + ) + + *tupleOpenEdgeTerm = *firstOf( + seq( + valueTerm, + valueTerm, + tupleOpenEdgeValueTailTerm, + func(val Value, ts tupleState) tupleState { + ts.oe = openEdgeIntoValue(val, ts.oe) + return ts + }, + ), + seq( + tupleTerm, + tupleTerm, + tupleOpenEdgeTailTerm, + func(oe *OpenEdge, ts tupleState) tupleState { + ts.oe = oe + return ts + }, + ), + ) + + *tupleOpenEdgeTailTerm = *firstOf( + tupleEndTerm, + prefixed(trimmedRuneTerm(','), tupleTailTerm), + ) + + *tupleOpenEdgeValueTailTerm = *firstOf( + tupleOpenEdgeTailTerm, + prefixed(trimmedRuneTerm('<'), tupleOpenEdgeTerm), + ) + + *graphTerm = *seq( + stringerStr("graph"), + trimmedRuneTerm('{'), + graphTailTerm, + func(lr locatableRune, gs graphState) Value { + if gs.g == nil { + gs.g = new(Graph) + } + + return Value{Graph: gs.g, Location: lr.locate()} + }, + ) + + *graphTailTerm = *firstOf( + graphEndTerm, + seq( + nameTerm, + nameTerm, + prefixed(trimmedRuneTerm('='), graphOpenEdgeTerm), + func(name Value, gs graphState) graphState { + if gs.g == nil { + gs.g = new(Graph) + } + + gs.g = gs.g.AddValueIn(name, gs.oe) + gs.oe = nil + return gs + }, + ), + ) + + *graphOpenEdgeTerm = *firstOf( + seq( + valueTerm, + valueTerm, + graphOpenEdgeValueTailTerm, + func(val Value, gs graphState) graphState { + gs.oe = openEdgeIntoValue(val, gs.oe) + return gs + }, + ), + seq( + tupleTerm, + tupleTerm, + graphOpenEdgeTailTerm, + func(oe *OpenEdge, gs graphState) graphState { + gs.oe = oe + return gs + }, + ), + ) + + *graphOpenEdgeTailTerm = *firstOf( + graphEndTerm, + prefixed(trimmedRuneTerm(';'), graphTailTerm), + ) + + *graphOpenEdgeValueTailTerm = *firstOf( + graphOpenEdgeTailTerm, + prefixed(trimmedRuneTerm('<'), graphOpenEdgeTerm), + ) + + *valueTerm = *firstOf(nameTerm, numberTerm, graphTerm) + + return graphTerm, valueTerm +}() + +var topLevelTerm = trimmedTerm(valueTerm) diff --git a/gg/term_test.go b/gg/term_test.go new file mode 100644 index 0000000..80a041e --- /dev/null +++ b/gg/term_test.go @@ -0,0 +1,387 @@ +package gg + +import ( + "bytes" + "io" + "strconv" + "testing" + + "github.com/mediocregopher/ginger/graph" + "github.com/stretchr/testify/assert" +) + +func decoderLeftover(d *Decoder) string { + unread := make([]rune, len(d.unread)) + for i := range unread { + unread[i] = d.unread[i].r + } + + rest, err := io.ReadAll(d.br) + if err != nil { + panic(err) + } + return string(unread) + string(rest) +} + +func TestTermDecoding(t *testing.T) { + type test struct { + in string + exp Value + expErr string + leftover string + } + + runTests := func( + t *testing.T, name string, term *term[Value], tests []test, + ) { + t.Run(name, func(t *testing.T) { + for i, test := range tests { + t.Run(strconv.Itoa(i), func(t *testing.T) { + dec := NewDecoder(bytes.NewBufferString(test.in)) + got, err := term.decodeFn(dec) + if test.expErr != "" { + assert.Error(t, err) + assert.Equal(t, test.expErr, err.Error()) + } else if assert.NoError(t, err) { + assert.True(t, + test.exp.Equal(got), + "\nexp:%v\ngot:%v", test.exp, got, + ) + assert.Equal(t, test.leftover, decoderLeftover(dec)) + } + }) + } + }) + } + + expNum := func(row, col int, n int64) Value { + return Value{Number: &n, Location: Location{row, col}} + } + + runTests(t, "number", numberTerm, []test{ + {in: `0`, exp: expNum(1, 1, 0)}, + {in: `100`, exp: expNum(1, 1, 100)}, + {in: `-100`, exp: expNum(1, 1, -100)}, + {in: `0foo`, exp: expNum(1, 1, 0), leftover: "foo"}, + {in: `100foo`, exp: expNum(1, 1, 100), leftover: "foo"}, + }) + + expName := func(row, col int, name string) Value { + return Value{Name: &name, Location: Location{row, col}} + } + + expGraph := func(row, col int, g *Graph) Value { + return Value{Graph: g, Location: Location{row, col}} + } + + runTests(t, "name", nameTerm, []test{ + {in: `a`, exp: expName(1, 1, "a")}, + {in: `ab`, exp: expName(1, 1, "ab")}, + {in: `ab2c`, exp: expName(1, 1, "ab2c")}, + {in: `ab2c,`, exp: expName(1, 1, "ab2c"), leftover: ","}, + }) + + runTests(t, "graph", graphTerm, []test{ + {in: `{}`, exp: expGraph(1, 1, new(Graph))}, + {in: `{`, expErr: `1:2: expected '}' or name`}, + {in: `{a}`, expErr: `1:3: expected '='`}, + {in: `{a=}`, expErr: `1:4: expected name or number or graph or tuple`}, + { + in: `{foo=a}`, + exp: expGraph( + 1, 1, new(Graph). + AddValueIn( + expName(2, 1, "foo"), + graph.ValueOut(None, expName(6, 1, "a")), + ), + ), + }, + { + in: `{ foo = a }`, + exp: expGraph( + 1, 1, new(Graph). + AddValueIn( + expName(2, 1, "foo"), + graph.ValueOut(None, expName(6, 1, "a")), + ), + ), + }, + {in: `{1=a}`, expErr: `1:2: expected '}' or name`}, + {in: `{foo=a ,}`, expErr: `1:8: expected '}' or ';' or '<'`}, + {in: `{foo=a`, expErr: `1:7: expected '}' or ';' or '<'`}, + { + in: `{foo=a" - case !v.Value.IsZero(): + case v.Value != (gg.Value{}): return v.Value.String() default: @@ -106,15 +107,14 @@ func (v Value) String() string { // scope contains pre-defined operations and values which are available during // the evaluation. func EvaluateSource(opSrc io.Reader, input Value, scope Scope) (Value, error) { - lexer := gg.NewLexer(opSrc) - - g, err := gg.DecodeLexer(lexer) + v, err := gg.NewDecoder(opSrc).Next() if err != nil { return Value{}, err + } else if v.Graph == nil { + return Value{}, errors.New("value must be a graph") } - fn, err := FunctionFromGraph(g, scope.NewScope()) - + fn, err := FunctionFromGraph(v.Graph, scope.NewScope()) if err != nil { return Value{}, err } diff --git a/vm/vm_test.go b/vm/vm_test.go index 456c37d..9a0a706 100644 --- a/vm/vm_test.go +++ b/vm/vm_test.go @@ -10,11 +10,11 @@ import ( func TestVM(t *testing.T) { - src := ` - incr = { out = add < (1; in;); }; + src := `{ + incr = { out = add < (1, in); }; out = incr < incr < in; - ` + }` var in int64 = 5