From c4dd673bf4adf20f81c7e22654bf5bd4f9efc3b4 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 29 Dec 2021 13:57:14 -0700 Subject: [PATCH] Refactor Graph internals to deduplicate OpenEdges This change required OpenEdges to be passed around as pointers, which in turn required me to audit how value copying is being done everywhere and simplify it in a few places. I think I covered all the bases. The new internals of Graph allow the graph's actual structure to be reflected within the graph itself. For example, if the OpenEdges of two ValueIns are equivalent then the same OpenEdge pointer is shared for the two internally. This applies recursively, so if two OpenEdge tuples share an inner OpenEdge, they will share the same pointer. This change is a preliminary requirement for creating a graph mapping operation. Without OpenEdge deduplication the map operation would end up operating on the same OpenEdge multiple times, which would be incorrect. --- gg/decoder.go | 22 +++--- gg/decoder_test.go | 6 +- gg/gg.go | 4 +- graph/graph.go | 167 +++++++++++++++++++++++--------------------- graph/graph_test.go | 10 +-- vm/op.go | 18 ++--- vm/scope.go | 4 +- 7 files changed, 118 insertions(+), 113 deletions(-) diff --git a/gg/decoder.go b/gg/decoder.go index b803331..bdf574d 100644 --- a/gg/decoder.go +++ b/gg/decoder.go @@ -90,7 +90,7 @@ func (d *decoder) parseSingleValue( func (d *decoder) parseOpenEdge( toks []LexerToken, ) ( - graph.OpenEdge[Value], []LexerToken, error, + *graph.OpenEdge[Value], []LexerToken, error, ) { if isPunct(toks[0], punctOpenTuple) { @@ -113,7 +113,7 @@ func (d *decoder) parseOpenEdge( } if err != nil { - return graph.OpenEdge[Value]{}, nil, err + return nil, nil, err } @@ -124,20 +124,20 @@ func (d *decoder) parseOpenEdge( opTok, toks := toks[0], toks[1:] if !isPunct(opTok, punctOp) { - return graph.OpenEdge[Value]{}, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm) + return nil, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm) } if len(toks) == 0 { - return graph.OpenEdge[Value]{}, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp) + return nil, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp) } oe, toks, err := d.parseOpenEdge(toks) if err != nil { - return graph.OpenEdge[Value]{}, nil, err + return nil, nil, err } - oe = graph.TupleOut[Value]([]graph.OpenEdge[Value]{oe}, val) + oe = graph.TupleOut[Value]([]*graph.OpenEdge[Value]{oe}, val) return oe, toks, nil } @@ -145,17 +145,17 @@ func (d *decoder) parseOpenEdge( func (d *decoder) parseTuple( toks []LexerToken, ) ( - graph.OpenEdge[Value], []LexerToken, error, + *graph.OpenEdge[Value], []LexerToken, error, ) { openTok, toks := toks[0], toks[1:] - var edges []graph.OpenEdge[Value] + var edges []*graph.OpenEdge[Value] for { if len(toks) == 0 { - return graph.OpenEdge[Value]{}, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple) + return nil, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple) } else if isPunct(toks[0], punctCloseTuple) { toks = toks[1:] @@ -163,14 +163,14 @@ func (d *decoder) parseTuple( } var ( - oe graph.OpenEdge[Value] + oe *graph.OpenEdge[Value] err error ) oe, toks, err = d.parseOpenEdge(toks) if err != nil { - return graph.OpenEdge[Value]{}, nil, err + return nil, nil, err } edges = append(edges, oe) diff --git a/gg/decoder_test.go b/gg/decoder_test.go index e7290f1..a73a2ff 100644 --- a/gg/decoder_test.go +++ b/gg/decoder_test.go @@ -21,15 +21,15 @@ func TestDecoder(t *testing.T) { return Value{Name: &n} } - vOut := func(val, edgeVal Value) graph.OpenEdge[Value] { + vOut := func(val, edgeVal Value) *graph.OpenEdge[Value] { return graph.ValueOut(val, edgeVal) } - tOut := func(ins []graph.OpenEdge[Value], edgeVal Value) graph.OpenEdge[Value] { + tOut := func(ins []*graph.OpenEdge[Value], edgeVal Value) *graph.OpenEdge[Value] { return graph.TupleOut(ins, edgeVal) } - type openEdge = graph.OpenEdge[Value] + type openEdge = *graph.OpenEdge[Value] tests := []struct { in string diff --git a/gg/gg.go b/gg/gg.go index 35fbecf..5b1c35a 100644 --- a/gg/gg.go +++ b/gg/gg.go @@ -1,4 +1,4 @@ -// Package gg implements ginger graph creation, traversal, and (de)serialization +// Package gg implements graph serialization to/from the gg text format. package gg import ( @@ -10,7 +10,7 @@ import ( // ZeroValue is a Value with no fields set. var ZeroValue Value -// Value represents a value being stored in a Graph. +// Value represents a value which can be serialized by the gg text format. type Value struct { // Only one of these fields may be set diff --git a/graph/graph.go b/graph/graph.go index 22fd20b..34c95f6 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -7,7 +7,9 @@ import ( "strings" ) -// Value is any value which can be stored within a Graph. +// Value is any value which can be stored within a Graph. Values should be +// considered immutable, ie once used with the graph package their internal +// value does not change. type Value interface { Equal(Value) bool String() string @@ -17,12 +19,12 @@ type Value interface { // constructing graphs. It has no meaning on its own. type OpenEdge[V Value] struct { val *V - tup []OpenEdge[V] + tup []*OpenEdge[V] edgeVal V } -func (oe OpenEdge[V]) equal(oe2 OpenEdge[V]) bool { +func (oe *OpenEdge[V]) equal(oe2 *OpenEdge[V]) bool { if !oe.edgeVal.Equal(oe2.edgeVal) { return false } @@ -44,7 +46,7 @@ func (oe OpenEdge[V]) equal(oe2 OpenEdge[V]) bool { return true } -func (oe OpenEdge[V]) String() string { +func (oe *OpenEdge[V]) String() string { vertexType := "tup" @@ -73,9 +75,10 @@ func (oe OpenEdge[V]) String() string { // the previous edge value. // // NOTE I _think_ this can be factored out once Graph is genericized. -func (oe OpenEdge[V]) WithEdgeValue(val V) OpenEdge[V] { - oe.edgeVal = val - return oe +func (oe *OpenEdge[V]) WithEdgeValue(val V) *OpenEdge[V] { + oeCp := *oe + oeCp.edgeVal = val + return &oeCp } // EdgeValue returns the Value which lies on the edge itself. @@ -96,7 +99,7 @@ func (oe OpenEdge[V]) FromValue() (V, bool) { // FromTuple returns the tuple of OpenEdges from which the OpenEdge was created // via TupleOut, or false if it wasn't created via TupleOut. -func (oe OpenEdge[V]) FromTuple() ([]OpenEdge[V], bool) { +func (oe OpenEdge[V]) FromTuple() ([]*OpenEdge[V], bool) { if oe.val != nil { return nil, false } @@ -105,37 +108,37 @@ func (oe OpenEdge[V]) FromTuple() ([]OpenEdge[V], bool) { } // ValueOut creates a OpenEdge which, when used to construct a Graph, represents -// an edge (with edgeVal attached to it) coming from the ValueVertex containing -// val. -func ValueOut[V Value](val, edgeVal V) OpenEdge[V] { - return OpenEdge[V]{val: &val, edgeVal: edgeVal} +// an edge (with edgeVal attached to it) coming from the vertex containing val. +func ValueOut[V Value](val, edgeVal V) *OpenEdge[V] { + return &OpenEdge[V]{ + val: &val, + edgeVal: edgeVal, + } } // TupleOut creates an OpenEdge which, when used to construct a Graph, -// represents an edge (with edgeVal attached to it) coming from the -// TupleVertex comprised of the given ordered-set of input edges. -// -// If len(ins) == 1 && edgeVal.IsZero(), then that single OpenEdge is -// returned as-is. -func TupleOut[V Value](ins []OpenEdge[V], edgeVal V) OpenEdge[V] { +// represents an edge (with edgeVal attached to it) coming from the vertex +// comprised of the given ordered-set of input edges. +func TupleOut[V Value](ins []*OpenEdge[V], edgeVal V) *OpenEdge[V] { if len(ins) == 1 { - in := ins[0] - var zero V + var ( + zero V + in = ins[0] + ) if edgeVal.Equal(zero) { return in } if in.edgeVal.Equal(zero) { - in.edgeVal = edgeVal - return in + return in.WithEdgeValue(edgeVal) } } - return OpenEdge[V]{ + return &OpenEdge[V]{ tup: ins, edgeVal: edgeVal, } @@ -143,39 +146,11 @@ func TupleOut[V Value](ins []OpenEdge[V], edgeVal V) OpenEdge[V] { type graphValueIn[V Value] struct { val V - edges []OpenEdge[V] -} - -func (valIn graphValueIn[V]) cp() graphValueIn[V] { - cp := valIn - cp.edges = make([]OpenEdge[V], len(valIn.edges)) - copy(cp.edges, valIn.edges) - return valIn + edge *OpenEdge[V] } func (valIn graphValueIn[V]) equal(valIn2 graphValueIn[V]) bool { - if !valIn.val.Equal(valIn2.val) { - return false - } - - if len(valIn.edges) != len(valIn2.edges) { - return false - } - -outer: - for _, edge := range valIn.edges { - - for _, edge2 := range valIn2.edges { - - if edge.equal(edge2) { - continue outer - } - } - - return false - } - - return true + return valIn.val.Equal(valIn2.val) && valIn.edge.equal(valIn2.edge) } // Graph is an immutable container of a set of vertices. The Graph keeps track @@ -186,13 +161,16 @@ outer: // lots of O(N) operations, unnecessary copying on changes, and duplicate data // in memory. type Graph[V Value] struct { + edges []*OpenEdge[V] valIns []graphValueIn[V] } func (g *Graph[V]) cp() *Graph[V] { cp := &Graph[V]{ + edges: make([]*OpenEdge[V], len(g.edges)), valIns: make([]graphValueIn[V], len(g.valIns)), } + copy(cp.edges, g.edges) copy(cp.valIns, g.valIns) return cp } @@ -202,55 +180,82 @@ func (g *Graph[V]) String() string { var strs []string for _, valIn := range g.valIns { - for _, oe := range valIn.edges { - strs = append( - strs, - fmt.Sprintf("valIn(%s, %s)", oe.String(), valIn.val.String()), - ) - } + strs = append( + strs, + fmt.Sprintf("valIn(%s, %s)", valIn.edge.String(), valIn.val.String()), + ) } return fmt.Sprintf("graph(%s)", strings.Join(strs, ", ")) } -// ValueIns returns, if any, all OpenEdges which lead to the given Value in the -// Graph (ie, all those added via AddValueIn). -func (g *Graph[V]) ValueIns(val Value) []OpenEdge[V] { - for _, valIn := range g.valIns { - if valIn.val.Equal(val) { - return valIn.cp().edges +// NOTE this method is used more for its functionality than for any performance +// reasons... it's incredibly inefficient in how it deduplicates edges, but by +// doing the deduplication we enable the graph map operation to work correctly. +func (g *Graph[V]) dedupeEdge(edge *OpenEdge[V]) *OpenEdge[V] { + + // check if there's an existing edge which is fully equivalent in the graph + // already, and if so return that. + for i := range g.edges { + if g.edges[i].equal(edge) { + return g.edges[i] } } - return nil + // if this edge is a value edge then there's nothing else to do, return it. + if _, ok := edge.FromValue(); ok { + return edge + } + + // this edge is a tuple edge, it's possible that one of its sub-edges is + // already in the graph. dedupe each sub-edge individually. + + tupEdges := make([]*OpenEdge[V], len(edge.tup)) + + for i := range edge.tup { + tupEdges[i] = g.dedupeEdge(edge.tup[i]) + } + + return TupleOut(tupEdges, edge.EdgeValue()) +} + +// ValueIns returns, if any, all OpenEdges which lead to the given Value in the +// Graph (ie, all those added via AddValueIn). +// +// The returned slice should not be modified. +func (g *Graph[V]) ValueIns(val Value) []*OpenEdge[V] { + + var edges []*OpenEdge[V] + + for _, valIn := range g.valIns { + if valIn.val.Equal(val) { + edges = append(edges, valIn.edge) + } + } + + return edges } // AddValueIn takes a OpenEdge and connects it to the Value vertex containing // val, returning the new Graph which reflects that connection. -func (g *Graph[V]) AddValueIn(oe OpenEdge[V], val V) *Graph[V] { +func (g *Graph[V]) AddValueIn(oe *OpenEdge[V], val V) *Graph[V] { - edges := g.ValueIns(val) + valIn := graphValueIn[V]{ + val: val, + edge: oe, + } - for _, existingOE := range edges { - if existingOE.equal(oe) { + for i := range g.valIns { + if g.valIns[i].equal(valIn) { return g } } - // ValueIns returns a copy of edges, so we're ok to modify it. - edges = append(edges, oe) - valIn := graphValueIn[V]{val: val, edges: edges} + valIn.edge = g.dedupeEdge(valIn.edge) g = g.cp() - - for i, existingValIn := range g.valIns { - if existingValIn.val.Equal(val) { - g.valIns[i] = valIn - return g - } - } - g.valIns = append(g.valIns, valIn) + return g } diff --git a/graph/graph_test.go b/graph/graph_test.go index 1adc502..a5d09f3 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -41,7 +41,7 @@ func TestEqual(t *testing.T) { }, { a: zeroGraph.AddValueIn(ValueOut[S]("in", "incr"), "out"), - b: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ + b: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{ ValueOut[S]("in", "ident"), ValueOut[S]("1", "ident"), }, "add"), "out"), @@ -49,11 +49,11 @@ func TestEqual(t *testing.T) { }, { // tuples are different order - a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ + a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{ ValueOut[S]("1", "ident"), ValueOut[S]("in", "ident"), }, "add"), "out"), - b: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ + b: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{ ValueOut[S]("in", "ident"), ValueOut[S]("1", "ident"), }, "add"), "out"), @@ -62,7 +62,7 @@ func TestEqual(t *testing.T) { { // tuple with no edge value and just a single input edge should be // equivalent to just that edge. - a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ + a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{ ValueOut[S]("1", "ident"), }, zeroValue), "out"), b: zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"), @@ -72,7 +72,7 @@ func TestEqual(t *testing.T) { // tuple with an edge value and just a single input edge that has no // edgeVal should be equivalent to just that edge with the tuple's // edge value. - a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ + a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{ ValueOut[S]("1", zeroValue), }, "ident"), "out"), b: zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"), diff --git a/vm/op.go b/vm/op.go index 8cc61c2..ec3401c 100644 --- a/vm/op.go +++ b/vm/op.go @@ -17,12 +17,12 @@ var ( // The Scope passed into Perform can be used to Evaluate the OpenEdge, as // needed. type Operation interface { - Perform(graph.OpenEdge[gg.Value], Scope) (Value, error) + Perform(*graph.OpenEdge[gg.Value], Scope) (Value, error) } func preEvalValOp(fn func(Value) (Value, error)) Operation { - return OperationFunc(func(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { + return OperationFunc(func(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) { edgeVal, err := EvaluateEdge(edge, scope) @@ -41,15 +41,15 @@ func preEvalValOp(fn func(Value) (Value, error)) Operation { // // This also doesn't yet support passing an operation as a value to another // operation. -func preEvalEdgeOp(fn func(graph.OpenEdge[gg.Value]) (Value, error)) Operation { +func preEvalEdgeOp(fn func(*graph.OpenEdge[gg.Value]) (Value, error)) Operation { return preEvalValOp(func(val Value) (Value, error) { - var edge graph.OpenEdge[gg.Value] + var edge *graph.OpenEdge[gg.Value] if len(val.Tuple) > 0 { - tupEdges := make([]graph.OpenEdge[gg.Value], len(val.Tuple)) + tupEdges := make([]*graph.OpenEdge[gg.Value], len(val.Tuple)) for i := range val.Tuple { tupEdges[i] = graph.ValueOut[gg.Value](val.Tuple[i].Value, gg.ZeroValue) @@ -87,9 +87,9 @@ func OperationFromGraph(g *graph.Graph[gg.Value], scope Scope) Operation { } } -func (g *graphOp) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { +func (g *graphOp) Perform(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) { - return preEvalEdgeOp(func(edge graph.OpenEdge[gg.Value]) (Value, error) { + return preEvalEdgeOp(func(edge *graph.OpenEdge[gg.Value]) (Value, error) { scope = ScopeFromGraph( g.Graph.AddValueIn(edge, inVal.Value), @@ -103,9 +103,9 @@ func (g *graphOp) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, er } // OperationFunc is a function which implements the Operation interface. -type OperationFunc func(graph.OpenEdge[gg.Value], Scope) (Value, error) +type OperationFunc func(*graph.OpenEdge[gg.Value], Scope) (Value, error) // Perform calls the underlying OperationFunc directly. -func (f OperationFunc) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { +func (f OperationFunc) Perform(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) { return f(edge, scope) } diff --git a/vm/scope.go b/vm/scope.go index f2f5f34..dbe1949 100644 --- a/vm/scope.go +++ b/vm/scope.go @@ -23,7 +23,7 @@ type Scope interface { // edgeToValue ignores the edgeValue, it only evaluates the edge's vertex as a // Value. -func edgeToValue(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { +func edgeToValue(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) { if ggVal, ok := edge.FromValue(); ok { @@ -61,7 +61,7 @@ func edgeToValue(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { // EvaluateEdge will use the given Scope to evaluate the edge's ultimate Value, // after passing all leaf vertices up the tree through all Operations found on // edge values. -func EvaluateEdge(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { +func EvaluateEdge(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) { edgeVal := Value{Value: edge.EdgeValue()}