ginger/gg/decoder.go
Brian Picciano 33e59a3836 Implement Decoder
The decoder basically works, though there's some quirks in the design
I'll need to marinate one. For example, you can't have a tuple as an
edge value. This is probably fine?

Stringification of Graphs was added to aid in debugging the decoder, the
format it outputs is not the final one. Most likely the (future) encoder
will be used for that purpose.

The decoder is not implemented in the nicest way; it fully reads in the
LexerTokens first, and then processes. This made trying to wrap my head
around the problem a lot easier because it left fewer failure cases, but
it's not the most efficient thing to do.

Now that v0 is done it's pretty plain to see that the decoder could work
by only reading in the next N tokens that it needs at a time. But that
will be left for a future version.
2021-12-27 15:45:18 -07:00

319 lines
6.4 KiB
Go

package gg
import (
"errors"
"fmt"
"io"
"strconv"
)
// Punctuations which are used in the gg file format.
const (
punctTerm = ";"
punctOp = "<"
punctAssign = "="
punctOpenGraph = "{"
punctCloseGraph = "}"
punctOpenTuple = "("
punctCloseTuple = ")"
)
func decoderErr(tok LexerToken, err error) error {
return fmt.Errorf("%d:%d: %w", tok.Row, tok.Col, err)
}
func decoderErrf(tok LexerToken, str string, args ...interface{}) error {
return decoderErr(tok, fmt.Errorf(str, args...))
}
func isPunct(tok LexerToken, val string) bool {
return tok.Kind == LexerTokenKindPunctuation && tok.Value == val
}
func isTerm(tok LexerToken) bool {
return isPunct(tok, punctTerm)
}
// decoder is currently only really used to namespace functions related to
// decoding Graphs. It may later have actual fields added to it, such as for
// options passed by the caller.
type decoder struct{}
// returned boolean value indicates if the token following the single token is a
// term. If a term followed the first token then it is not included in the
// returned leftover tokens.
//
// if termed is false then leftover tokens cannot be empty.
func (d *decoder) parseSingleValue(
toks []LexerToken,
) (
Value, []LexerToken, bool, error,
) {
tok, rest := toks[0], toks[1:]
if len(rest) == 0 {
return Value{}, nil, false, decoderErrf(tok, "cannot be final token, possibly missing %q", punctTerm)
}
termed := isTerm(rest[0])
if termed {
rest = rest[1:]
}
switch tok.Kind {
case LexerTokenKindName:
return Value{Name: &tok.Value}, rest, termed, nil
case LexerTokenKindNumber:
i, err := strconv.ParseInt(tok.Value, 10, 64)
if err != nil {
return Value{}, nil, false, decoderErrf(tok, "parsing %q as integer: %w", tok.Value, err)
}
return Value{Number: &i}, rest, termed, nil
case LexerTokenKindPunctuation:
return Value{}, nil, false, decoderErrf(tok, "expected value, found punctuation %q", tok.Value)
default:
panic(fmt.Sprintf("unexpected token kind %q", tok.Kind))
}
}
func (d *decoder) parseOpenEdge(
toks []LexerToken,
) (
OpenEdge, []LexerToken, error,
) {
if isPunct(toks[0], punctOpenTuple) {
return d.parseTuple(toks)
}
var (
val Value
termed bool
err error
)
switch {
case isPunct(toks[0], punctOpenGraph):
val, toks, termed, err = d.parseGraphValue(toks, true)
default:
val, toks, termed, err = d.parseSingleValue(toks)
}
if err != nil {
return OpenEdge{}, nil, err
}
if termed {
return ValueOut(val, Value{}), toks, nil
}
opTok, toks := toks[0], toks[1:]
if !isPunct(opTok, punctOp) {
return OpenEdge{}, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm)
}
if len(toks) == 0 {
return OpenEdge{}, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp)
}
oe, toks, err := d.parseOpenEdge(toks)
if err != nil {
return OpenEdge{}, nil, err
}
oe = TupleOut([]OpenEdge{oe}, val)
return oe, toks, nil
}
func (d *decoder) parseTuple(
toks []LexerToken,
) (
OpenEdge, []LexerToken, error,
) {
openTok, toks := toks[0], toks[1:]
var edges []OpenEdge
for {
if len(toks) == 0 {
return OpenEdge{}, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple)
} else if isPunct(toks[0], punctCloseTuple) {
toks = toks[1:]
break
}
var (
oe OpenEdge
err error
)
oe, toks, err = d.parseOpenEdge(toks)
if err != nil {
return OpenEdge{}, nil, err
}
edges = append(edges, oe)
}
// this is a quirk of the syntax, _technically_ a tuple doesn't need a
// term after it, since it can't be used as an edge value, and so
// nothing can come after it in the chain.
if len(toks) > 0 && isTerm(toks[0]) {
toks = toks[1:]
}
return TupleOut(edges, Value{}), toks, nil
}
// returned boolean value indicates if the token following the graph is a term.
// If a term followed the first token then it is not included in the returned
// leftover tokens.
//
// if termed is false then leftover tokens cannot be empty.
func (d *decoder) parseGraphValue(
toks []LexerToken, expectWrappers bool,
) (
Value, []LexerToken, bool, error,
) {
var openTok LexerToken
if expectWrappers {
openTok, toks = toks[0], toks[1:]
}
g := ZeroGraph
for {
if len(toks) == 0 {
if !expectWrappers {
break
}
return Value{}, nil, false, decoderErrf(openTok, "no matching %q", punctCloseGraph)
} else if closingTok := toks[0]; isPunct(closingTok, punctCloseGraph) {
if !expectWrappers {
return Value{}, nil, false, decoderErrf(closingTok, "unexpected %q", punctCloseGraph)
}
toks = toks[1:]
if len(toks) == 0 {
return Value{}, nil, false, decoderErrf(closingTok, "cannot be final token, possibly missing %q", punctTerm)
}
break
}
var err error
if g, toks, err = d.parseValIn(g, toks); err != nil {
return Value{}, nil, false, err
}
}
val := Value{Graph: g}
if !expectWrappers {
return val, toks, true, nil
}
termed := isTerm(toks[0])
if termed {
toks = toks[1:]
}
return val, toks, termed, nil
}
func (d *decoder) parseValIn(into *Graph, toks []LexerToken) (*Graph, []LexerToken, error) {
if len(toks) == 0 {
return into, nil, nil
} else if len(toks) < 3 {
return nil, nil, decoderErrf(toks[0], `must be of the form "<name> = ..."`)
}
dst := toks[0]
eq := toks[1]
toks = toks[2:]
if dst.Kind != LexerTokenKindName {
return nil, nil, decoderErrf(dst, "must be a name")
} else if !isPunct(eq, punctAssign) {
return nil, nil, decoderErrf(eq, "must be %q", punctAssign)
}
oe, toks, err := d.parseOpenEdge(toks)
if err != nil {
return nil, nil, err
}
dstVal := Value{Name: &dst.Value}
return into.AddValueIn(oe, dstVal), toks, nil
}
func (d *decoder) decode(lexer Lexer) (*Graph, error) {
var toks []LexerToken
for {
tok, err := lexer.Next()
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return nil, fmt.Errorf("reading next token: %w", err)
}
toks = append(toks, tok)
}
val, _, _, err := d.parseGraphValue(toks, false)
if err != nil {
return nil, err
}
return val.Graph, nil
}
// DecodeLexer reads lexigraphical tokens from the given Lexer and uses them to
// construct a Graph according to the rules of the gg file format. DecodeLexer
// will only return an error if there is a non-EOF file returned from the Lexer,
// or the tokens read cannot be used to construct a valid Graph.
func DecodeLexer(lexer Lexer) (*Graph, error) {
decoder := &decoder{}
return decoder.decode(lexer)
}