Refactor Graph internals to deduplicate OpenEdges

This change required OpenEdges to be passed around as pointers, which in turn required me to audit how value copying is being done everywhere and simplify it in a few places. I think I covered all the bases. The new internals of Graph allow the graph's actual structure to be reflected within the graph itself. For example, if the OpenEdges of two ValueIns are equivalent then the same OpenEdge pointer is shared for the two internally. This applies recursively, so if two OpenEdge tuples share an inner OpenEdge, they will share the same pointer. This change is a preliminary requirement for creating a graph mapping operation. Without OpenEdge deduplication the map operation would end up operating on the same OpenEdge multiple times, which would be incorrect.
2021-12-29 13:57:14 -07:00 · 2021-12-29 13:57:14 -07:00 · c4dd673bf4
commit c4dd673bf4
parent 181c392219
7 changed files with 118 additions and 113 deletions
--- a/gg/decoder.go
+++ b/gg/decoder.go
@ -90,7 +90,7 @@ func (d *decoder) parseSingleValue(
 func (d *decoder) parseOpenEdge(
 	toks []LexerToken,
 ) (
-	graph.OpenEdge[Value], []LexerToken, error,
+	*graph.OpenEdge[Value], []LexerToken, error,
 ) {

 	if isPunct(toks[0], punctOpenTuple) {
@ -113,7 +113,7 @@ func (d *decoder) parseOpenEdge(
 	}

 	if err != nil {
-		return graph.OpenEdge[Value]{}, nil, err
+		return nil, nil, err

 	}

@ -124,20 +124,20 @@ func (d *decoder) parseOpenEdge(
 	opTok, toks := toks[0], toks[1:]

 	if !isPunct(opTok, punctOp) {
-		return graph.OpenEdge[Value]{}, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm)
+		return nil, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm)
 	}

 	if len(toks) == 0 {
-		return graph.OpenEdge[Value]{}, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp)
+		return nil, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp)
 	}

 	oe, toks, err := d.parseOpenEdge(toks)

 	if err != nil {
-		return graph.OpenEdge[Value]{}, nil, err
+		return nil, nil, err
 	}

-	oe = graph.TupleOut[Value]([]graph.OpenEdge[Value]{oe}, val)
+	oe = graph.TupleOut[Value]([]*graph.OpenEdge[Value]{oe}, val)

 	return oe, toks, nil
 }
@ -145,17 +145,17 @@ func (d *decoder) parseOpenEdge(
 func (d *decoder) parseTuple(
 	toks []LexerToken,
 ) (
-	graph.OpenEdge[Value], []LexerToken, error,
+	*graph.OpenEdge[Value], []LexerToken, error,
 ) {

 	openTok, toks := toks[0], toks[1:]

-	var edges []graph.OpenEdge[Value]
+	var edges []*graph.OpenEdge[Value]

 	for {

 		if len(toks) == 0 {
-			return graph.OpenEdge[Value]{}, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple)
+			return nil, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple)

 		} else if isPunct(toks[0], punctCloseTuple) {
 			toks = toks[1:]
@ -163,14 +163,14 @@ func (d *decoder) parseTuple(
 		}

 		var (
-			oe  graph.OpenEdge[Value]
+			oe  *graph.OpenEdge[Value]
 			err error
 		)

 		oe, toks, err = d.parseOpenEdge(toks)

 		if err != nil {
-			return graph.OpenEdge[Value]{}, nil, err
+			return nil, nil, err
 		}

 		edges = append(edges, oe)
--- a/gg/decoder_test.go
+++ b/gg/decoder_test.go
@ -21,15 +21,15 @@ func TestDecoder(t *testing.T) {
 		return Value{Name: &n}
 	}

-	vOut := func(val, edgeVal Value) graph.OpenEdge[Value] {
+	vOut := func(val, edgeVal Value) *graph.OpenEdge[Value] {
 		return graph.ValueOut(val, edgeVal)
 	}

-	tOut := func(ins []graph.OpenEdge[Value], edgeVal Value) graph.OpenEdge[Value] {
+	tOut := func(ins []*graph.OpenEdge[Value], edgeVal Value) *graph.OpenEdge[Value] {
 		return graph.TupleOut(ins, edgeVal)
 	}

-	type openEdge = graph.OpenEdge[Value]
+	type openEdge = *graph.OpenEdge[Value]

 	tests := []struct {
 		in  string
--- a/gg/gg.go
+++ b/gg/gg.go
@ -1,4 +1,4 @@
-// Package gg implements ginger graph creation, traversal, and (de)serialization
+// Package gg implements graph serialization to/from the gg text format.
 package gg

 import (
@ -10,7 +10,7 @@ import (
 // ZeroValue is a Value with no fields set.
 var ZeroValue Value

-// Value represents a value being stored in a Graph.
+// Value represents a value which can be serialized by the gg text format.
 type Value struct {

 	// Only one of these fields may be set
--- a/graph/graph.go
+++ b/graph/graph.go
@ -7,7 +7,9 @@ import (
 	"strings"
 )

-// Value is any value which can be stored within a Graph.
+// Value is any value which can be stored within a Graph. Values should be
+// considered immutable, ie once used with the graph package their internal
+// value does not change.
 type Value interface {
 	Equal(Value) bool
 	String() string
@ -17,12 +19,12 @@ type Value interface {
 // constructing graphs. It has no meaning on its own.
 type OpenEdge[V Value] struct {
 	val *V
-	tup []OpenEdge[V]
+	tup []*OpenEdge[V]

 	edgeVal V
 }

-func (oe OpenEdge[V]) equal(oe2 OpenEdge[V]) bool {
+func (oe *OpenEdge[V]) equal(oe2 *OpenEdge[V]) bool {
 	if !oe.edgeVal.Equal(oe2.edgeVal) {
 		return false
 	}
@ -44,7 +46,7 @@ func (oe OpenEdge[V]) equal(oe2 OpenEdge[V]) bool {
 	return true
 }

-func (oe OpenEdge[V]) String() string {
+func (oe *OpenEdge[V]) String() string {

 	vertexType := "tup"

@ -73,9 +75,10 @@ func (oe OpenEdge[V]) String() string {
 // the previous edge value.
 //
 // NOTE I _think_ this can be factored out once Graph is genericized.
-func (oe OpenEdge[V]) WithEdgeValue(val V) OpenEdge[V] {
-	oe.edgeVal = val
-	return oe
+func (oe *OpenEdge[V]) WithEdgeValue(val V) *OpenEdge[V] {
+	oeCp := *oe
+	oeCp.edgeVal = val
+	return &oeCp
 }

 // EdgeValue returns the Value which lies on the edge itself.
@ -96,7 +99,7 @@ func (oe OpenEdge[V]) FromValue() (V, bool) {

 // FromTuple returns the tuple of OpenEdges from which the OpenEdge was created
 // via TupleOut, or false if it wasn't created via TupleOut.
-func (oe OpenEdge[V]) FromTuple() ([]OpenEdge[V], bool) {
+func (oe OpenEdge[V]) FromTuple() ([]*OpenEdge[V], bool) {
 	if oe.val != nil {
 		return nil, false
 	}
@ -105,37 +108,37 @@ func (oe OpenEdge[V]) FromTuple() ([]OpenEdge[V], bool) {
 }

 // ValueOut creates a OpenEdge which, when used to construct a Graph, represents
-// an edge (with edgeVal attached to it) coming from the ValueVertex containing
-// val.
-func ValueOut[V Value](val, edgeVal V) OpenEdge[V] {
-	return OpenEdge[V]{val: &val, edgeVal: edgeVal}
+// an edge (with edgeVal attached to it) coming from the vertex containing val.
+func ValueOut[V Value](val, edgeVal V) *OpenEdge[V] {
+	return &OpenEdge[V]{
+		val: &val,
+		edgeVal: edgeVal,
+	}
 }

 // TupleOut creates an OpenEdge which, when used to construct a Graph,
-// represents an edge (with edgeVal attached to it) coming from the
-// TupleVertex comprised of the given ordered-set of input edges.
-//
-// If len(ins) == 1 && edgeVal.IsZero(), then that single OpenEdge is
-// returned as-is.
-func TupleOut[V Value](ins []OpenEdge[V], edgeVal V) OpenEdge[V] {
+// represents an edge (with edgeVal attached to it) coming from the vertex
+// comprised of the given ordered-set of input edges.
+func TupleOut[V Value](ins []*OpenEdge[V], edgeVal V) *OpenEdge[V] {

 	if len(ins) == 1 {

-		in := ins[0]
-		var zero V
+		var (
+			zero V
+			in = ins[0]
+		)

 		if edgeVal.Equal(zero) {
 			return in
 		}

 		if in.edgeVal.Equal(zero) {
-			in.edgeVal = edgeVal
-			return in
+			return in.WithEdgeValue(edgeVal)
 		}

 	}

-	return OpenEdge[V]{
+	return &OpenEdge[V]{
 		tup: ins,
 		edgeVal: edgeVal,
 	}
@ -143,39 +146,11 @@ func TupleOut[V Value](ins []OpenEdge[V], edgeVal V) OpenEdge[V] {

 type graphValueIn[V Value] struct {
 	val   V
-	edges []OpenEdge[V]
-}
-
-func (valIn graphValueIn[V]) cp() graphValueIn[V] {
-	cp := valIn
-	cp.edges = make([]OpenEdge[V], len(valIn.edges))
-	copy(cp.edges, valIn.edges)
-	return valIn
+	edge *OpenEdge[V]
 }

 func (valIn graphValueIn[V]) equal(valIn2 graphValueIn[V]) bool {
-	if !valIn.val.Equal(valIn2.val) {
-		return false
-	}
-
-	if len(valIn.edges) != len(valIn2.edges) {
-		return false
-	}
-
-outer:
-	for _, edge := range valIn.edges {
-
-		for _, edge2 := range valIn2.edges {
-
-			if edge.equal(edge2) {
-				continue outer
-			}
-		}
-
-		return false
-	}
-
-	return true
+	return valIn.val.Equal(valIn2.val) && valIn.edge.equal(valIn2.edge)
 }

 // Graph is an immutable container of a set of vertices. The Graph keeps track
@ -186,13 +161,16 @@ outer:
 // lots of O(N) operations, unnecessary copying on changes, and duplicate data
 // in memory.
 type Graph[V Value] struct {
+	edges []*OpenEdge[V]
 	valIns []graphValueIn[V]
 }

 func (g *Graph[V]) cp() *Graph[V] {
 	cp := &Graph[V]{
+		edges: make([]*OpenEdge[V], len(g.edges)),
 		valIns: make([]graphValueIn[V], len(g.valIns)),
 	}
+	copy(cp.edges, g.edges)
 	copy(cp.valIns, g.valIns)
 	return cp
 }
@ -202,55 +180,82 @@ func (g *Graph[V]) String() string {
 	var strs []string

 	for _, valIn := range g.valIns {
-		for _, oe := range valIn.edges {
-			strs = append(
-				strs,
-				fmt.Sprintf("valIn(%s, %s)", oe.String(), valIn.val.String()),
-			)
-		}
+		strs = append(
+			strs,
+			fmt.Sprintf("valIn(%s, %s)", valIn.edge.String(), valIn.val.String()),
+		)
 	}

 	return fmt.Sprintf("graph(%s)", strings.Join(strs, ", "))
 }

-// ValueIns returns, if any, all OpenEdges which lead to the given Value in the
-// Graph (ie, all those added via AddValueIn).
-func (g *Graph[V]) ValueIns(val Value) []OpenEdge[V] {
-	for _, valIn := range g.valIns {
-		if valIn.val.Equal(val) {
-			return valIn.cp().edges
+// NOTE this method is used more for its functionality than for any performance
+// reasons... it's incredibly inefficient in how it deduplicates edges, but by
+// doing the deduplication we enable the graph map operation to work correctly.
+func (g *Graph[V]) dedupeEdge(edge *OpenEdge[V]) *OpenEdge[V] {
+
+	// check if there's an existing edge which is fully equivalent in the graph
+	// already, and if so return that.
+	for i := range g.edges {
+		if g.edges[i].equal(edge) {
+			return g.edges[i]
 		}
 	}

-	return nil
+	// if this edge is a value edge then there's nothing else to do, return it.
+	if _, ok := edge.FromValue(); ok {
+		return edge
+	}
+
+	// this edge is a tuple edge, it's possible that one of its sub-edges is
+	// already in the graph. dedupe each sub-edge individually.
+
+	tupEdges := make([]*OpenEdge[V], len(edge.tup))
+
+	for i := range edge.tup {
+		tupEdges[i] = g.dedupeEdge(edge.tup[i])
+	}
+
+	return TupleOut(tupEdges, edge.EdgeValue())
+}
+
+// ValueIns returns, if any, all OpenEdges which lead to the given Value in the
+// Graph (ie, all those added via AddValueIn).
+//
+// The returned slice should not be modified.
+func (g *Graph[V]) ValueIns(val Value) []*OpenEdge[V] {
+
+	var edges []*OpenEdge[V]
+
+	for _, valIn := range g.valIns {
+		if valIn.val.Equal(val) {
+			edges = append(edges, valIn.edge)
+		}
+	}
+
+	return edges
 }

 // AddValueIn takes a OpenEdge and connects it to the Value vertex containing
 // val, returning the new Graph which reflects that connection.
-func (g *Graph[V]) AddValueIn(oe OpenEdge[V], val V) *Graph[V] {
+func (g *Graph[V]) AddValueIn(oe *OpenEdge[V], val V) *Graph[V] {

-	edges := g.ValueIns(val)
+	valIn := graphValueIn[V]{
+		val: val,
+		edge: oe,
+	}

-	for _, existingOE := range edges {
-		if existingOE.equal(oe) {
+	for i := range g.valIns {
+		if g.valIns[i].equal(valIn) {
 			return g
 		}
 	}

-	// ValueIns returns a copy of edges, so we're ok to modify it.
-	edges = append(edges, oe)
-	valIn := graphValueIn[V]{val: val, edges: edges}
+	valIn.edge = g.dedupeEdge(valIn.edge)

 	g = g.cp()
-
-	for i, existingValIn := range g.valIns {
-		if existingValIn.val.Equal(val) {
-			g.valIns[i] = valIn
-			return g
-		}
-	}
-
 	g.valIns = append(g.valIns, valIn)
+
 	return g
 }

--- a/graph/graph_test.go
+++ b/graph/graph_test.go
@ -41,7 +41,7 @@ func TestEqual(t *testing.T) {
 		},
 		{
 			a: zeroGraph.AddValueIn(ValueOut[S]("in", "incr"), "out"),
-			b: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{
+			b: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
 				ValueOut[S]("in", "ident"),
 				ValueOut[S]("1", "ident"),
 			}, "add"), "out"),
@ -49,11 +49,11 @@ func TestEqual(t *testing.T) {
 		},
 		{
 			// tuples are different order
-			a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{
+			a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
 				ValueOut[S]("1", "ident"),
 				ValueOut[S]("in", "ident"),
 			}, "add"), "out"),
-			b: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{
+			b: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
 				ValueOut[S]("in", "ident"),
 				ValueOut[S]("1", "ident"),
 			}, "add"), "out"),
@ -62,7 +62,7 @@ func TestEqual(t *testing.T) {
 		{
 			// tuple with no edge value and just a single input edge should be
 			// equivalent to just that edge.
-			a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{
+			a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
 				ValueOut[S]("1", "ident"),
 			}, zeroValue), "out"),
 			b:   zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"),
@ -72,7 +72,7 @@ func TestEqual(t *testing.T) {
 			// tuple with an edge value and just a single input edge that has no
 			// edgeVal should be equivalent to just that edge with the tuple's
 			// edge value.
-			a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{
+			a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
 				ValueOut[S]("1", zeroValue),
 			}, "ident"), "out"),
 			b:   zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"),
--- a/vm/op.go
+++ b/vm/op.go
@ -17,12 +17,12 @@ var (
 // The Scope passed into Perform can be used to Evaluate the OpenEdge, as
 // needed.
 type Operation interface {
-	Perform(graph.OpenEdge[gg.Value], Scope) (Value, error)
+	Perform(*graph.OpenEdge[gg.Value], Scope) (Value, error)
 }

 func preEvalValOp(fn func(Value) (Value, error)) Operation {

-	return OperationFunc(func(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
+	return OperationFunc(func(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {

 		edgeVal, err := EvaluateEdge(edge, scope)

@ -41,15 +41,15 @@ func preEvalValOp(fn func(Value) (Value, error)) Operation {
 //
 // This also doesn't yet support passing an operation as a value to another
 // operation.
-func preEvalEdgeOp(fn func(graph.OpenEdge[gg.Value]) (Value, error)) Operation {
+func preEvalEdgeOp(fn func(*graph.OpenEdge[gg.Value]) (Value, error)) Operation {

 	return preEvalValOp(func(val Value) (Value, error) {

-		var edge graph.OpenEdge[gg.Value]
+		var edge *graph.OpenEdge[gg.Value]

 		if len(val.Tuple) > 0 {

-			tupEdges := make([]graph.OpenEdge[gg.Value], len(val.Tuple))
+			tupEdges := make([]*graph.OpenEdge[gg.Value], len(val.Tuple))

 			for i := range val.Tuple {
 				tupEdges[i] = graph.ValueOut[gg.Value](val.Tuple[i].Value, gg.ZeroValue)
@ -87,9 +87,9 @@ func OperationFromGraph(g *graph.Graph[gg.Value], scope Scope) Operation {
 	}
 }

-func (g *graphOp) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
+func (g *graphOp) Perform(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {

-	return preEvalEdgeOp(func(edge graph.OpenEdge[gg.Value]) (Value, error) {
+	return preEvalEdgeOp(func(edge *graph.OpenEdge[gg.Value]) (Value, error) {

 		scope = ScopeFromGraph(
 			g.Graph.AddValueIn(edge, inVal.Value),
@ -103,9 +103,9 @@ func (g *graphOp) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, er
 }

 // OperationFunc is a function which implements the Operation interface.
-type OperationFunc func(graph.OpenEdge[gg.Value], Scope) (Value, error)
+type OperationFunc func(*graph.OpenEdge[gg.Value], Scope) (Value, error)

 // Perform calls the underlying OperationFunc directly.
-func (f OperationFunc) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
+func (f OperationFunc) Perform(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
 	return f(edge, scope)
 }
--- a/vm/scope.go
+++ b/vm/scope.go
@ -23,7 +23,7 @@ type Scope interface {

 // edgeToValue ignores the edgeValue, it only evaluates the edge's vertex as a
 // Value.
-func edgeToValue(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
+func edgeToValue(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {

 	if ggVal, ok := edge.FromValue(); ok {

@ -61,7 +61,7 @@ func edgeToValue(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
 // EvaluateEdge will use the given Scope to evaluate the edge's ultimate Value,
 // after passing all leaf vertices up the tree through all Operations found on
 // edge values.
-func EvaluateEdge(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
+func EvaluateEdge(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {

 	edgeVal := Value{Value: edge.EdgeValue()}