Refactor Graph internals to deduplicate OpenEdges

This change required OpenEdges to be passed around as pointers, which in
turn required me to audit how value copying is being done everywhere and
simplify it in a few places. I think I covered all the bases.

The new internals of Graph allow the graph's actual structure to be
reflected within the graph itself. For example, if the OpenEdges of two
ValueIns are equivalent then the same OpenEdge pointer is shared for the
two internally. This applies recursively, so if two OpenEdge tuples
share an inner OpenEdge, they will share the same pointer.

This change is a preliminary requirement for creating a graph mapping
operation. Without OpenEdge deduplication the map operation would end up
operating on the same OpenEdge multiple times, which would be incorrect.
This commit is contained in:
Brian Picciano 2021-12-29 13:57:14 -07:00
parent 181c392219
commit c4dd673bf4
7 changed files with 118 additions and 113 deletions

View File

@ -90,7 +90,7 @@ func (d *decoder) parseSingleValue(
func (d *decoder) parseOpenEdge( func (d *decoder) parseOpenEdge(
toks []LexerToken, toks []LexerToken,
) ( ) (
graph.OpenEdge[Value], []LexerToken, error, *graph.OpenEdge[Value], []LexerToken, error,
) { ) {
if isPunct(toks[0], punctOpenTuple) { if isPunct(toks[0], punctOpenTuple) {
@ -113,7 +113,7 @@ func (d *decoder) parseOpenEdge(
} }
if err != nil { if err != nil {
return graph.OpenEdge[Value]{}, nil, err return nil, nil, err
} }
@ -124,20 +124,20 @@ func (d *decoder) parseOpenEdge(
opTok, toks := toks[0], toks[1:] opTok, toks := toks[0], toks[1:]
if !isPunct(opTok, punctOp) { if !isPunct(opTok, punctOp) {
return graph.OpenEdge[Value]{}, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm) return nil, nil, decoderErrf(opTok, "must be %q or %q", punctOp, punctTerm)
} }
if len(toks) == 0 { if len(toks) == 0 {
return graph.OpenEdge[Value]{}, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp) return nil, nil, decoderErrf(opTok, "%q cannot terminate an edge declaration", punctOp)
} }
oe, toks, err := d.parseOpenEdge(toks) oe, toks, err := d.parseOpenEdge(toks)
if err != nil { if err != nil {
return graph.OpenEdge[Value]{}, nil, err return nil, nil, err
} }
oe = graph.TupleOut[Value]([]graph.OpenEdge[Value]{oe}, val) oe = graph.TupleOut[Value]([]*graph.OpenEdge[Value]{oe}, val)
return oe, toks, nil return oe, toks, nil
} }
@ -145,17 +145,17 @@ func (d *decoder) parseOpenEdge(
func (d *decoder) parseTuple( func (d *decoder) parseTuple(
toks []LexerToken, toks []LexerToken,
) ( ) (
graph.OpenEdge[Value], []LexerToken, error, *graph.OpenEdge[Value], []LexerToken, error,
) { ) {
openTok, toks := toks[0], toks[1:] openTok, toks := toks[0], toks[1:]
var edges []graph.OpenEdge[Value] var edges []*graph.OpenEdge[Value]
for { for {
if len(toks) == 0 { if len(toks) == 0 {
return graph.OpenEdge[Value]{}, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple) return nil, nil, decoderErrf(openTok, "no matching %q", punctCloseTuple)
} else if isPunct(toks[0], punctCloseTuple) { } else if isPunct(toks[0], punctCloseTuple) {
toks = toks[1:] toks = toks[1:]
@ -163,14 +163,14 @@ func (d *decoder) parseTuple(
} }
var ( var (
oe graph.OpenEdge[Value] oe *graph.OpenEdge[Value]
err error err error
) )
oe, toks, err = d.parseOpenEdge(toks) oe, toks, err = d.parseOpenEdge(toks)
if err != nil { if err != nil {
return graph.OpenEdge[Value]{}, nil, err return nil, nil, err
} }
edges = append(edges, oe) edges = append(edges, oe)

View File

@ -21,15 +21,15 @@ func TestDecoder(t *testing.T) {
return Value{Name: &n} return Value{Name: &n}
} }
vOut := func(val, edgeVal Value) graph.OpenEdge[Value] { vOut := func(val, edgeVal Value) *graph.OpenEdge[Value] {
return graph.ValueOut(val, edgeVal) return graph.ValueOut(val, edgeVal)
} }
tOut := func(ins []graph.OpenEdge[Value], edgeVal Value) graph.OpenEdge[Value] { tOut := func(ins []*graph.OpenEdge[Value], edgeVal Value) *graph.OpenEdge[Value] {
return graph.TupleOut(ins, edgeVal) return graph.TupleOut(ins, edgeVal)
} }
type openEdge = graph.OpenEdge[Value] type openEdge = *graph.OpenEdge[Value]
tests := []struct { tests := []struct {
in string in string

View File

@ -1,4 +1,4 @@
// Package gg implements ginger graph creation, traversal, and (de)serialization // Package gg implements graph serialization to/from the gg text format.
package gg package gg
import ( import (
@ -10,7 +10,7 @@ import (
// ZeroValue is a Value with no fields set. // ZeroValue is a Value with no fields set.
var ZeroValue Value var ZeroValue Value
// Value represents a value being stored in a Graph. // Value represents a value which can be serialized by the gg text format.
type Value struct { type Value struct {
// Only one of these fields may be set // Only one of these fields may be set

View File

@ -7,7 +7,9 @@ import (
"strings" "strings"
) )
// Value is any value which can be stored within a Graph. // Value is any value which can be stored within a Graph. Values should be
// considered immutable, ie once used with the graph package their internal
// value does not change.
type Value interface { type Value interface {
Equal(Value) bool Equal(Value) bool
String() string String() string
@ -17,12 +19,12 @@ type Value interface {
// constructing graphs. It has no meaning on its own. // constructing graphs. It has no meaning on its own.
type OpenEdge[V Value] struct { type OpenEdge[V Value] struct {
val *V val *V
tup []OpenEdge[V] tup []*OpenEdge[V]
edgeVal V edgeVal V
} }
func (oe OpenEdge[V]) equal(oe2 OpenEdge[V]) bool { func (oe *OpenEdge[V]) equal(oe2 *OpenEdge[V]) bool {
if !oe.edgeVal.Equal(oe2.edgeVal) { if !oe.edgeVal.Equal(oe2.edgeVal) {
return false return false
} }
@ -44,7 +46,7 @@ func (oe OpenEdge[V]) equal(oe2 OpenEdge[V]) bool {
return true return true
} }
func (oe OpenEdge[V]) String() string { func (oe *OpenEdge[V]) String() string {
vertexType := "tup" vertexType := "tup"
@ -73,9 +75,10 @@ func (oe OpenEdge[V]) String() string {
// the previous edge value. // the previous edge value.
// //
// NOTE I _think_ this can be factored out once Graph is genericized. // NOTE I _think_ this can be factored out once Graph is genericized.
func (oe OpenEdge[V]) WithEdgeValue(val V) OpenEdge[V] { func (oe *OpenEdge[V]) WithEdgeValue(val V) *OpenEdge[V] {
oe.edgeVal = val oeCp := *oe
return oe oeCp.edgeVal = val
return &oeCp
} }
// EdgeValue returns the Value which lies on the edge itself. // EdgeValue returns the Value which lies on the edge itself.
@ -96,7 +99,7 @@ func (oe OpenEdge[V]) FromValue() (V, bool) {
// FromTuple returns the tuple of OpenEdges from which the OpenEdge was created // FromTuple returns the tuple of OpenEdges from which the OpenEdge was created
// via TupleOut, or false if it wasn't created via TupleOut. // via TupleOut, or false if it wasn't created via TupleOut.
func (oe OpenEdge[V]) FromTuple() ([]OpenEdge[V], bool) { func (oe OpenEdge[V]) FromTuple() ([]*OpenEdge[V], bool) {
if oe.val != nil { if oe.val != nil {
return nil, false return nil, false
} }
@ -105,37 +108,37 @@ func (oe OpenEdge[V]) FromTuple() ([]OpenEdge[V], bool) {
} }
// ValueOut creates a OpenEdge which, when used to construct a Graph, represents // ValueOut creates a OpenEdge which, when used to construct a Graph, represents
// an edge (with edgeVal attached to it) coming from the ValueVertex containing // an edge (with edgeVal attached to it) coming from the vertex containing val.
// val. func ValueOut[V Value](val, edgeVal V) *OpenEdge[V] {
func ValueOut[V Value](val, edgeVal V) OpenEdge[V] { return &OpenEdge[V]{
return OpenEdge[V]{val: &val, edgeVal: edgeVal} val: &val,
edgeVal: edgeVal,
}
} }
// TupleOut creates an OpenEdge which, when used to construct a Graph, // TupleOut creates an OpenEdge which, when used to construct a Graph,
// represents an edge (with edgeVal attached to it) coming from the // represents an edge (with edgeVal attached to it) coming from the vertex
// TupleVertex comprised of the given ordered-set of input edges. // comprised of the given ordered-set of input edges.
// func TupleOut[V Value](ins []*OpenEdge[V], edgeVal V) *OpenEdge[V] {
// If len(ins) == 1 && edgeVal.IsZero(), then that single OpenEdge is
// returned as-is.
func TupleOut[V Value](ins []OpenEdge[V], edgeVal V) OpenEdge[V] {
if len(ins) == 1 { if len(ins) == 1 {
in := ins[0] var (
var zero V zero V
in = ins[0]
)
if edgeVal.Equal(zero) { if edgeVal.Equal(zero) {
return in return in
} }
if in.edgeVal.Equal(zero) { if in.edgeVal.Equal(zero) {
in.edgeVal = edgeVal return in.WithEdgeValue(edgeVal)
return in
} }
} }
return OpenEdge[V]{ return &OpenEdge[V]{
tup: ins, tup: ins,
edgeVal: edgeVal, edgeVal: edgeVal,
} }
@ -143,39 +146,11 @@ func TupleOut[V Value](ins []OpenEdge[V], edgeVal V) OpenEdge[V] {
type graphValueIn[V Value] struct { type graphValueIn[V Value] struct {
val V val V
edges []OpenEdge[V] edge *OpenEdge[V]
}
func (valIn graphValueIn[V]) cp() graphValueIn[V] {
cp := valIn
cp.edges = make([]OpenEdge[V], len(valIn.edges))
copy(cp.edges, valIn.edges)
return valIn
} }
func (valIn graphValueIn[V]) equal(valIn2 graphValueIn[V]) bool { func (valIn graphValueIn[V]) equal(valIn2 graphValueIn[V]) bool {
if !valIn.val.Equal(valIn2.val) { return valIn.val.Equal(valIn2.val) && valIn.edge.equal(valIn2.edge)
return false
}
if len(valIn.edges) != len(valIn2.edges) {
return false
}
outer:
for _, edge := range valIn.edges {
for _, edge2 := range valIn2.edges {
if edge.equal(edge2) {
continue outer
}
}
return false
}
return true
} }
// Graph is an immutable container of a set of vertices. The Graph keeps track // Graph is an immutable container of a set of vertices. The Graph keeps track
@ -186,13 +161,16 @@ outer:
// lots of O(N) operations, unnecessary copying on changes, and duplicate data // lots of O(N) operations, unnecessary copying on changes, and duplicate data
// in memory. // in memory.
type Graph[V Value] struct { type Graph[V Value] struct {
edges []*OpenEdge[V]
valIns []graphValueIn[V] valIns []graphValueIn[V]
} }
func (g *Graph[V]) cp() *Graph[V] { func (g *Graph[V]) cp() *Graph[V] {
cp := &Graph[V]{ cp := &Graph[V]{
edges: make([]*OpenEdge[V], len(g.edges)),
valIns: make([]graphValueIn[V], len(g.valIns)), valIns: make([]graphValueIn[V], len(g.valIns)),
} }
copy(cp.edges, g.edges)
copy(cp.valIns, g.valIns) copy(cp.valIns, g.valIns)
return cp return cp
} }
@ -202,55 +180,82 @@ func (g *Graph[V]) String() string {
var strs []string var strs []string
for _, valIn := range g.valIns { for _, valIn := range g.valIns {
for _, oe := range valIn.edges { strs = append(
strs = append( strs,
strs, fmt.Sprintf("valIn(%s, %s)", valIn.edge.String(), valIn.val.String()),
fmt.Sprintf("valIn(%s, %s)", oe.String(), valIn.val.String()), )
)
}
} }
return fmt.Sprintf("graph(%s)", strings.Join(strs, ", ")) return fmt.Sprintf("graph(%s)", strings.Join(strs, ", "))
} }
// ValueIns returns, if any, all OpenEdges which lead to the given Value in the // NOTE this method is used more for its functionality than for any performance
// Graph (ie, all those added via AddValueIn). // reasons... it's incredibly inefficient in how it deduplicates edges, but by
func (g *Graph[V]) ValueIns(val Value) []OpenEdge[V] { // doing the deduplication we enable the graph map operation to work correctly.
for _, valIn := range g.valIns { func (g *Graph[V]) dedupeEdge(edge *OpenEdge[V]) *OpenEdge[V] {
if valIn.val.Equal(val) {
return valIn.cp().edges // check if there's an existing edge which is fully equivalent in the graph
// already, and if so return that.
for i := range g.edges {
if g.edges[i].equal(edge) {
return g.edges[i]
} }
} }
return nil // if this edge is a value edge then there's nothing else to do, return it.
if _, ok := edge.FromValue(); ok {
return edge
}
// this edge is a tuple edge, it's possible that one of its sub-edges is
// already in the graph. dedupe each sub-edge individually.
tupEdges := make([]*OpenEdge[V], len(edge.tup))
for i := range edge.tup {
tupEdges[i] = g.dedupeEdge(edge.tup[i])
}
return TupleOut(tupEdges, edge.EdgeValue())
}
// ValueIns returns, if any, all OpenEdges which lead to the given Value in the
// Graph (ie, all those added via AddValueIn).
//
// The returned slice should not be modified.
func (g *Graph[V]) ValueIns(val Value) []*OpenEdge[V] {
var edges []*OpenEdge[V]
for _, valIn := range g.valIns {
if valIn.val.Equal(val) {
edges = append(edges, valIn.edge)
}
}
return edges
} }
// AddValueIn takes a OpenEdge and connects it to the Value vertex containing // AddValueIn takes a OpenEdge and connects it to the Value vertex containing
// val, returning the new Graph which reflects that connection. // val, returning the new Graph which reflects that connection.
func (g *Graph[V]) AddValueIn(oe OpenEdge[V], val V) *Graph[V] { func (g *Graph[V]) AddValueIn(oe *OpenEdge[V], val V) *Graph[V] {
edges := g.ValueIns(val) valIn := graphValueIn[V]{
val: val,
edge: oe,
}
for _, existingOE := range edges { for i := range g.valIns {
if existingOE.equal(oe) { if g.valIns[i].equal(valIn) {
return g return g
} }
} }
// ValueIns returns a copy of edges, so we're ok to modify it. valIn.edge = g.dedupeEdge(valIn.edge)
edges = append(edges, oe)
valIn := graphValueIn[V]{val: val, edges: edges}
g = g.cp() g = g.cp()
for i, existingValIn := range g.valIns {
if existingValIn.val.Equal(val) {
g.valIns[i] = valIn
return g
}
}
g.valIns = append(g.valIns, valIn) g.valIns = append(g.valIns, valIn)
return g return g
} }

View File

@ -41,7 +41,7 @@ func TestEqual(t *testing.T) {
}, },
{ {
a: zeroGraph.AddValueIn(ValueOut[S]("in", "incr"), "out"), a: zeroGraph.AddValueIn(ValueOut[S]("in", "incr"), "out"),
b: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ b: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
ValueOut[S]("in", "ident"), ValueOut[S]("in", "ident"),
ValueOut[S]("1", "ident"), ValueOut[S]("1", "ident"),
}, "add"), "out"), }, "add"), "out"),
@ -49,11 +49,11 @@ func TestEqual(t *testing.T) {
}, },
{ {
// tuples are different order // tuples are different order
a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
ValueOut[S]("1", "ident"), ValueOut[S]("1", "ident"),
ValueOut[S]("in", "ident"), ValueOut[S]("in", "ident"),
}, "add"), "out"), }, "add"), "out"),
b: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ b: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
ValueOut[S]("in", "ident"), ValueOut[S]("in", "ident"),
ValueOut[S]("1", "ident"), ValueOut[S]("1", "ident"),
}, "add"), "out"), }, "add"), "out"),
@ -62,7 +62,7 @@ func TestEqual(t *testing.T) {
{ {
// tuple with no edge value and just a single input edge should be // tuple with no edge value and just a single input edge should be
// equivalent to just that edge. // equivalent to just that edge.
a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
ValueOut[S]("1", "ident"), ValueOut[S]("1", "ident"),
}, zeroValue), "out"), }, zeroValue), "out"),
b: zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"), b: zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"),
@ -72,7 +72,7 @@ func TestEqual(t *testing.T) {
// tuple with an edge value and just a single input edge that has no // tuple with an edge value and just a single input edge that has no
// edgeVal should be equivalent to just that edge with the tuple's // edgeVal should be equivalent to just that edge with the tuple's
// edge value. // edge value.
a: zeroGraph.AddValueIn(TupleOut[S]([]OpenEdge[S]{ a: zeroGraph.AddValueIn(TupleOut[S]([]*OpenEdge[S]{
ValueOut[S]("1", zeroValue), ValueOut[S]("1", zeroValue),
}, "ident"), "out"), }, "ident"), "out"),
b: zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"), b: zeroGraph.AddValueIn(ValueOut[S]("1", "ident"), "out"),

View File

@ -17,12 +17,12 @@ var (
// The Scope passed into Perform can be used to Evaluate the OpenEdge, as // The Scope passed into Perform can be used to Evaluate the OpenEdge, as
// needed. // needed.
type Operation interface { type Operation interface {
Perform(graph.OpenEdge[gg.Value], Scope) (Value, error) Perform(*graph.OpenEdge[gg.Value], Scope) (Value, error)
} }
func preEvalValOp(fn func(Value) (Value, error)) Operation { func preEvalValOp(fn func(Value) (Value, error)) Operation {
return OperationFunc(func(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { return OperationFunc(func(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
edgeVal, err := EvaluateEdge(edge, scope) edgeVal, err := EvaluateEdge(edge, scope)
@ -41,15 +41,15 @@ func preEvalValOp(fn func(Value) (Value, error)) Operation {
// //
// This also doesn't yet support passing an operation as a value to another // This also doesn't yet support passing an operation as a value to another
// operation. // operation.
func preEvalEdgeOp(fn func(graph.OpenEdge[gg.Value]) (Value, error)) Operation { func preEvalEdgeOp(fn func(*graph.OpenEdge[gg.Value]) (Value, error)) Operation {
return preEvalValOp(func(val Value) (Value, error) { return preEvalValOp(func(val Value) (Value, error) {
var edge graph.OpenEdge[gg.Value] var edge *graph.OpenEdge[gg.Value]
if len(val.Tuple) > 0 { if len(val.Tuple) > 0 {
tupEdges := make([]graph.OpenEdge[gg.Value], len(val.Tuple)) tupEdges := make([]*graph.OpenEdge[gg.Value], len(val.Tuple))
for i := range val.Tuple { for i := range val.Tuple {
tupEdges[i] = graph.ValueOut[gg.Value](val.Tuple[i].Value, gg.ZeroValue) tupEdges[i] = graph.ValueOut[gg.Value](val.Tuple[i].Value, gg.ZeroValue)
@ -87,9 +87,9 @@ func OperationFromGraph(g *graph.Graph[gg.Value], scope Scope) Operation {
} }
} }
func (g *graphOp) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { func (g *graphOp) Perform(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
return preEvalEdgeOp(func(edge graph.OpenEdge[gg.Value]) (Value, error) { return preEvalEdgeOp(func(edge *graph.OpenEdge[gg.Value]) (Value, error) {
scope = ScopeFromGraph( scope = ScopeFromGraph(
g.Graph.AddValueIn(edge, inVal.Value), g.Graph.AddValueIn(edge, inVal.Value),
@ -103,9 +103,9 @@ func (g *graphOp) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, er
} }
// OperationFunc is a function which implements the Operation interface. // OperationFunc is a function which implements the Operation interface.
type OperationFunc func(graph.OpenEdge[gg.Value], Scope) (Value, error) type OperationFunc func(*graph.OpenEdge[gg.Value], Scope) (Value, error)
// Perform calls the underlying OperationFunc directly. // Perform calls the underlying OperationFunc directly.
func (f OperationFunc) Perform(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { func (f OperationFunc) Perform(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
return f(edge, scope) return f(edge, scope)
} }

View File

@ -23,7 +23,7 @@ type Scope interface {
// edgeToValue ignores the edgeValue, it only evaluates the edge's vertex as a // edgeToValue ignores the edgeValue, it only evaluates the edge's vertex as a
// Value. // Value.
func edgeToValue(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { func edgeToValue(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
if ggVal, ok := edge.FromValue(); ok { if ggVal, ok := edge.FromValue(); ok {
@ -61,7 +61,7 @@ func edgeToValue(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
// EvaluateEdge will use the given Scope to evaluate the edge's ultimate Value, // EvaluateEdge will use the given Scope to evaluate the edge's ultimate Value,
// after passing all leaf vertices up the tree through all Operations found on // after passing all leaf vertices up the tree through all Operations found on
// edge values. // edge values.
func EvaluateEdge(edge graph.OpenEdge[gg.Value], scope Scope) (Value, error) { func EvaluateEdge(edge *graph.OpenEdge[gg.Value], scope Scope) (Value, error) {
edgeVal := Value{Value: edge.EdgeValue()} edgeVal := Value{Value: edge.EdgeValue()}