From e113e96f1fe6ffbe64d5bccce2a42b00b07517fd Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Sat, 21 Oct 2023 17:42:31 +0200 Subject: [PATCH] WIP --- gg/v2/decoder.go | 73 +++++++++ gg/v2/gg.bnf | 24 +++ gg/v2/gg.go | 116 ++++++++++++++ gg/v2/location.go | 49 ++++++ gg/v2/term.go | 365 +++++++++++++++++++++++++++++++++++++++++++++ gg/v2/term_test.go | 220 +++++++++++++++++++++++++++ 6 files changed, 847 insertions(+) create mode 100644 gg/v2/decoder.go create mode 100644 gg/v2/gg.bnf create mode 100644 gg/v2/gg.go create mode 100644 gg/v2/location.go create mode 100644 gg/v2/term.go create mode 100644 gg/v2/term_test.go diff --git a/gg/v2/decoder.go b/gg/v2/decoder.go new file mode 100644 index 0000000..602414b --- /dev/null +++ b/gg/v2/decoder.go @@ -0,0 +1,73 @@ +package gg + +import ( + "bufio" + "fmt" + "io" +) + +// Decoder reads Value's off of a byte stream. +type Decoder struct { + br *bufio.Reader + brNextLoc Location + + unread []locatableRune + lastRead locatableRune +} + +// NewDecoder returns a Decoder which will decode the given stream as a gg +// formatted stream of a Values. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{ + br: bufio.NewReader(r), + brNextLoc: Location{Row: 1, Col: 1}, + } +} + +func (d *Decoder) readRune() (locatableRune, error) { + if len(d.unread) > 0 { + d.lastRead = d.unread[len(d.unread)-1] + d.unread = d.unread[:len(d.unread)-1] + return d.lastRead, nil + } + + loc := d.brNextLoc + + r, _, err := d.br.ReadRune() + if err != nil { + return d.lastRead, err + } + + if r == '\n' { + d.brNextLoc.Row++ + d.brNextLoc.Col = 1 + } else { + d.brNextLoc.Col++ + } + + d.lastRead = locatableRune{loc, r} + return d.lastRead, nil +} + +func (d *Decoder) unreadRune(lr locatableRune) { + if d.lastRead != lr { + panic(fmt.Sprintf( + "unreading rune %#v, but last read rune was %#v", lr, d.lastRead, + )) + } + + d.unread = append(d.unread, lr) +} + +func (d *Decoder) nextLoc() Location { + if len(d.unread) > 0 { + return d.unread[len(d.unread)-1].Location + } + + return d.brNextLoc +} + +// Next returns the next top-level value in the stream, or io.EOF. +func (d *Decoder) Next() (Value, error) { + panic("TODO") +} diff --git a/gg/v2/gg.bnf b/gg/v2/gg.bnf new file mode 100644 index 0000000..a6d262b --- /dev/null +++ b/gg/v2/gg.bnf @@ -0,0 +1,24 @@ + ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + ::= + + ::= "-" + ::= | + + ::= ( | ) ( | | )* + + ::= | | | + + ::= "(" + ::= ")" | + ::= ")" + | "," + | "<" + + ::= "{" + ::= "}" | "=" + ::= + ::= "}" + | ";" + | "<" + + ::= | | + ::= | diff --git a/gg/v2/gg.go b/gg/v2/gg.go new file mode 100644 index 0000000..c57d9f9 --- /dev/null +++ b/gg/v2/gg.go @@ -0,0 +1,116 @@ +// Package gg implements graph serialization to/from the gg text format. +package gg + +import ( + "fmt" + + "github.com/mediocregopher/ginger/graph" +) + +// Type aliases for convenience +type ( + Graph = graph.Graph[OptionalValue, Value] + OpenEdge = graph.OpenEdge[OptionalValue, Value] +) + +// Value represents a value which can be serialized by the gg text format. +type Value struct { + Location + + // Only one of these fields may be set + Name *string + Number *int64 + Graph *Graph +} + +// Name returns a name Value. +func Name(name string) Value { + return Value{Name: &name} +} + +// Number returns a number Value. +func Number(n int64) Value { + return Value{Number: &n} +} + +// Equal returns true if the passed in Value is equivalent, ignoring the +// LexerToken on either Value. +// +// Will panic if the passed in v2 is not a Value from this package. +func (v Value) Equal(v2g graph.Value) bool { + + v2 := v2g.(Value) + + switch { + + case v.Name != nil && v2.Name != nil && *v.Name == *v2.Name: + return true + + case v.Number != nil && v2.Number != nil && *v.Number == *v2.Number: + return true + + case v.Graph != nil && v2.Graph != nil && v.Graph.Equal(v2.Graph): + return true + + default: + return false + } +} + +func (v Value) String() string { + + switch { + + case v.Name != nil: + return *v.Name + + case v.Number != nil: + return fmt.Sprint(*v.Number) + + case v.Graph != nil: + return v.Graph.String() + + default: + panic("no fields set on Value") + } +} + +// OptionalValue is a Value which may be unset. This is used for edge values, +// since edges might not have a value. +type OptionalValue struct { + Value + Valid bool +} + +// None is the zero OptionalValue (hello rustaceans). +var None OptionalValue + +// Some wraps a Value to be an OptionalValue. +func Some(v Value) OptionalValue { + return OptionalValue{Valid: true, Value: v} +} + +func (v OptionalValue) String() string { + if !v.Valid { + return "" + } + return v.Value.String() +} + +func (v OptionalValue) Equal(v2g graph.Value) bool { + var v2 OptionalValue + + if v2Val, ok := v2g.(Value); ok { + v2 = Some(v2Val) + } else { + v2 = v2g.(OptionalValue) + } + + if v.Valid != v2.Valid { + return false + } else if !v.Valid { + return true + } + + return v.Value.Equal(v2.Value) +} diff --git a/gg/v2/location.go b/gg/v2/location.go new file mode 100644 index 0000000..73a0047 --- /dev/null +++ b/gg/v2/location.go @@ -0,0 +1,49 @@ +package gg + +import "fmt" + +// Location indicates a position in a stream of bytes identified by column +// within newline-separated rows. +type Location struct { + Row, Col int +} + +func (l Location) errf(str string, args ...any) LocatedError { + return LocatedError{l, fmt.Errorf(str, args...)} +} + +func (l Location) locate() Location { return l } + +// LocatedError is an error related to a specific point within a decode gg +// stream. +type LocatedError struct { + Location + Err error +} + +func (e LocatedError) Error() string { + return fmt.Sprintf("%d:%d: %v", e.Row, e.Col, e.Err) +} + +type locatable interface { + locate() Location +} + +type locatableRune struct { + Location + r rune +} + +type locatableString struct { + Location + str string +} + +type locatableSlice[T locatable] []T + +func (s locatableSlice[T]) locate() Location { + if len(s) == 0 { + panic("can't locate empty locatableSlice") + } + return s[0].locate() +} diff --git a/gg/v2/term.go b/gg/v2/term.go new file mode 100644 index 0000000..675e024 --- /dev/null +++ b/gg/v2/term.go @@ -0,0 +1,365 @@ +package gg + +import ( + "errors" + "fmt" + "io" + "strconv" + "strings" + "unicode" + + "github.com/mediocregopher/ginger/graph" +) + +var ( + errNoMatch = errors.New("not found") +) + +type stringerFn func() string + +func (fn stringerFn) String() string { + return fn() +} + +type stringerStr string + +func (str stringerStr) String() string { + return string(str) +} + +type term[T locatable] struct { + name fmt.Stringer + decodeFn func(d *Decoder) (T, error) +} + +func (t term[T]) String() string { + return t.name.String() +} + +func firstOf[T locatable](terms ...*term[T]) *term[T] { + if len(terms) < 2 { + panic("firstOfTerms requires at least 2 terms") + } + + return &term[T]{ + name: stringerFn(func() string { + descrs := make([]string, len(terms)) + for i := range terms { + descrs[i] = terms[i].String() + } + return strings.Join(descrs, " or ") + }), + decodeFn: func(d *Decoder) (T, error) { + var zero T + for _, t := range terms { + v, err := t.decodeFn(d) + if errors.Is(err, errNoMatch) { + continue + } else if err != nil { + return zero, err + } + + return v, nil + } + + return zero, errNoMatch + }, + } +} + +func seq[Ta, Tb, Tc locatable]( + name fmt.Stringer, + termA *term[Ta], + termB *term[Tb], + fn func(Ta, Tb) (Tc, error), +) *term[Tc] { + return &term[Tc]{ + name: name, + decodeFn: func(d *Decoder) (Tc, error) { + var zero Tc + + va, err := termA.decodeFn(d) + if err != nil { + return zero, err + } + + vb, err := termB.decodeFn(d) + if errors.Is(err, errNoMatch) { + return zero, d.nextLoc().errf("expected %v", termB) + } else if err != nil { + return zero, err + } + + vc, err := fn(va, vb) + if err != nil { + return zero, err + } + + return vc, nil + }, + } +} + +func matchAndSkip[Ta, Tb locatable]( + termA *term[Ta], termB *term[Tb], +) *term[Tb] { + return seq(termA, termA, termB, func(_ Ta, b Tb) (Tb, error) { + return b, nil + }) +} + +func oneOrMore[T locatable](t *term[T]) *term[locatableSlice[T]] { + return &term[locatableSlice[T]]{ + name: stringerFn(func() string { + return fmt.Sprintf("one or more %v", t) + }), + decodeFn: func(d *Decoder) (locatableSlice[T], error) { + var vv []T + for { + v, err := t.decodeFn(d) + if errors.Is(err, errNoMatch) { + break + } else if err != nil { + return nil, err + } + + vv = append(vv, v) + } + + if len(vv) == 0 { + return nil, errNoMatch + } + + return vv, nil + }, + } +} + +func zeroOrMore[T locatable](t *term[T]) *term[locatableSlice[T]] { + return &term[locatableSlice[T]]{ + name: stringerFn(func() string { + return fmt.Sprintf("zero or more %v", t) + }), + decodeFn: func(d *Decoder) (locatableSlice[T], error) { + var vv []T + for { + v, err := t.decodeFn(d) + if errors.Is(err, errNoMatch) { + break + } else if err != nil { + return nil, err + } + + vv = append(vv, v) + } + + return vv, nil + }, + } +} + +func mapTerm[Ta locatable, Tb locatable]( + name fmt.Stringer, t *term[Ta], fn func(Ta) Tb, +) *term[Tb] { + return &term[Tb]{ + name: name, + decodeFn: func(d *Decoder) (Tb, error) { + var zero Tb + va, err := t.decodeFn(d) + if err != nil { + return zero, err + } + return fn(va), nil + }, + } +} + +func runePredTerm( + name fmt.Stringer, pred func(rune) bool, +) *term[locatableRune] { + return &term[locatableRune]{ + name: name, + decodeFn: func(d *Decoder) (locatableRune, error) { + lr, err := d.readRune() + if errors.Is(err, io.EOF) { + return locatableRune{}, errNoMatch + } else if err != nil { + return locatableRune{}, err + } + + if !pred(lr.r) { + d.unreadRune(lr) + return locatableRune{}, errNoMatch + } + + return lr, nil + }, + } +} + +func runeTerm(r rune) *term[locatableRune] { + return runePredTerm( + stringerStr(fmt.Sprintf("'%c'", r)), + func(r2 rune) bool { return r2 == r }, + ) +} + +func locatableRunesToString(rr locatableSlice[locatableRune]) string { + str := make([]rune, len(rr)) + for i := range rr { + str[i] = rr[i].r + } + return string(str) +} + +func runesToStringTerm( + t *term[locatableSlice[locatableRune]], +) *term[locatableString] { + return mapTerm( + t, t, func(rr locatableSlice[locatableRune]) locatableString { + return locatableString{rr.locate(), locatableRunesToString(rr)} + }, + ) +} + +var ( + digitTerm = runePredTerm( + stringerStr("digit"), + func(r rune) bool { return '0' <= r && r <= '9' }, + ) + + positiveNumberTerm = runesToStringTerm(oneOrMore(digitTerm)) + + negativeNumberTerm = seq( + stringerStr("negative-number"), + runeTerm('-'), + positiveNumberTerm, + func(neg locatableRune, posNum locatableString) (locatableString, error) { + return locatableString{ + neg.locate(), string(neg.r) + posNum.str, + }, nil + }, + ) + + numberTerm = mapTerm( + stringerStr("number"), + firstOf(negativeNumberTerm, positiveNumberTerm), + func(str locatableString) Value { + i, err := strconv.ParseInt(str.str, 10, 64) + if err != nil { + panic(fmt.Errorf("parsing %q as int: %w", str, err)) + } + + return Value{Number: &i, Location: str.locate()} + }, + ) +) + +var ( + letterTerm = runePredTerm( + stringerStr("letter"), + func(r rune) bool { + return unicode.In(r, unicode.Letter, unicode.Mark) + }, + ) + + letterTailTerm = zeroOrMore(firstOf(letterTerm, digitTerm)) + + nameTerm = seq( + stringerStr("name"), + letterTerm, + letterTailTerm, + func(head locatableRune, tail locatableSlice[locatableRune]) (Value, error) { + name := string(head.r) + locatableRunesToString(tail) + return Value{Name: &name, Location: head.locate()}, nil + }, + ) +) + +var graphTerm = func() *term[Value] { + type graphState struct { + Location // location of last place graphState was updated + g *Graph + oe *OpenEdge + } + + var ( + // pre-define these, and then fill in the pointers after, in order to + // deal with recursive dependencies between them. + graphTerm = new(term[Value]) + graphTailTerm = new(term[graphState]) + graphOpenEdgeTerm = new(term[graphState]) + graphOpenEdgeTailTerm = new(term[graphState]) + valueTerm = new(term[Value]) + + rightCurlyBrace = runeTerm('}') + graphEndTerm = mapTerm( + rightCurlyBrace, + rightCurlyBrace, func(lr locatableRune) graphState { + // if '}', then map that to an empty state. This acts as a + // sentinel value to indicate "end of graph". + return graphState{Location: lr.locate()} + }, + ) + ) + + *graphTerm = *seq( + stringerStr("graph"), + runeTerm('{'), + graphTailTerm, + func(lr locatableRune, gs graphState) (Value, error) { + if gs.g == nil { + gs.g = new(Graph) + } + + return Value{Graph: gs.g, Location: lr.locate()}, nil + }, + ) + + *graphTailTerm = *firstOf( + graphEndTerm, + seq( + nameTerm, + nameTerm, + matchAndSkip(runeTerm('='), graphOpenEdgeTailTerm), + func(name Value, gs graphState) (graphState, error) { + if gs.g == nil { + gs.g = new(Graph) + } + + gs.g = gs.g.AddValueIn(name, gs.oe) + gs.oe = nil + gs.Location = name.locate() + return gs, nil + }, + ), + ) + + *graphOpenEdgeTerm = *firstOf( + graphEndTerm, + matchAndSkip(runeTerm(';'), graphTailTerm), + matchAndSkip(runeTerm('<'), graphOpenEdgeTailTerm), + ) + + *graphOpenEdgeTailTerm = *seq( + valueTerm, + valueTerm, + graphOpenEdgeTerm, + func(val Value, gs graphState) (graphState, error) { + if gs.oe == nil { + gs.oe = graph.ValueOut(None, val) + } else if !gs.oe.EdgeValue().Valid { + gs.oe = gs.oe.WithEdgeValue(Some(val)) + } else { + gs.oe = graph.TupleOut(Some(val), gs.oe) + } + + gs.Location = val.locate() + return gs, nil + }, + ) + + *valueTerm = *firstOf(nameTerm, numberTerm, graphTerm) + + return graphTerm +}() diff --git a/gg/v2/term_test.go b/gg/v2/term_test.go new file mode 100644 index 0000000..00e76c5 --- /dev/null +++ b/gg/v2/term_test.go @@ -0,0 +1,220 @@ +package gg + +import ( + "bytes" + "io" + "strconv" + "testing" + + "github.com/mediocregopher/ginger/graph" + "github.com/stretchr/testify/assert" +) + +func decoderLeftover(d *Decoder) string { + unread := make([]rune, len(d.unread)) + for i := range unread { + unread[i] = d.unread[i].r + } + + rest, err := io.ReadAll(d.br) + if err != nil { + panic(err) + } + return string(unread) + string(rest) +} + +func TestTermDecoding(t *testing.T) { + type test struct { + in string + exp Value + expErr string + leftover string + } + + runTests := func( + t *testing.T, name string, term *term[Value], tests []test, + ) { + t.Run(name, func(t *testing.T) { + for i, test := range tests { + t.Run(strconv.Itoa(i), func(t *testing.T) { + dec := NewDecoder(bytes.NewBufferString(test.in)) + got, err := term.decodeFn(dec) + if test.expErr != "" { + assert.Error(t, err) + assert.Equal(t, test.expErr, err.Error()) + } else if assert.NoError(t, err) { + assert.True(t, + test.exp.Equal(got), + "\nexp:%v\ngot:%v", test.exp, got, + ) + assert.Equal(t, test.leftover, decoderLeftover(dec)) + } + }) + } + }) + } + + expNum := func(row, col int, n int64) Value { + return Value{Number: &n, Location: Location{row, col}} + } + + runTests(t, "number", numberTerm, []test{ + {in: `0`, exp: expNum(1, 1, 0)}, + {in: `100`, exp: expNum(1, 1, 100)}, + {in: `-100`, exp: expNum(1, 1, -100)}, + {in: `0foo`, exp: expNum(1, 1, 0), leftover: "foo"}, + {in: `100foo`, exp: expNum(1, 1, 100), leftover: "foo"}, + }) + + expName := func(row, col int, name string) Value { + return Value{Name: &name, Location: Location{row, col}} + } + + expGraph := func(row, col int, g *Graph) Value { + return Value{Graph: g, Location: Location{row, col}} + } + + runTests(t, "name", nameTerm, []test{ + {in: `a`, exp: expName(1, 1, "a")}, + {in: `ab`, exp: expName(1, 1, "ab")}, + {in: `ab2c`, exp: expName(1, 1, "ab2c")}, + }) + + runTests(t, "graph", graphTerm, []test{ + {in: `{}`, exp: expGraph(1, 1, new(Graph))}, + {in: `{`, expErr: `1:2: expected '}' or name`}, + {in: `{a}`, expErr: `1:3: expected '='`}, + {in: `{a=}`, expErr: `1:4: expected name or number or graph`}, + { + in: `{foo=a}`, + exp: expGraph( + 1, 1, new(Graph). + AddValueIn( + expName(2, 1, "foo"), + graph.ValueOut(None, expName(6, 1, "a")), + ), + ), + }, + {in: `{1=a}`, expErr: `1:2: expected '}' or name`}, + {in: `{foo=a,}`, expErr: `1:7: expected '}' or ';' or '<'`}, + {in: `{foo=a`, expErr: `1:7: expected '}' or ';' or '<'`}, + { + in: `{foo=a