initial implementation of immutable Graph

This commit is contained in:
Brian Picciano 2017-10-21 12:39:23 -06:00
parent d0260a7d68
commit 9b577c0df8
2 changed files with 624 additions and 0 deletions

314
lang/gg/gg.go Normal file
View File

@ -0,0 +1,314 @@
// Package gg implements ginger graph creation, traversal, and (de)serialization
package gg
import (
"crypto/md5"
"encoding/hex"
"fmt"
"hash"
)
// Identifier is implemented by any value which can return a unique string for
// itself via an Identify method
type Identifier interface {
Identify(hash.Hash)
}
func identify(i Identifier) string {
h := md5.New()
i.Identify(h)
return hex.EncodeToString(h.Sum(nil))
}
// VertexType enumerates the different possible vertex types
type VertexType string
const (
// Value is a Vertex which contains exactly one value and has at least one
// edge (either input or output)
Value VertexType = "value"
// Junction is a Vertex which contains two or more in edges and exactly one
// out edge
Junction VertexType = "junction"
)
// Edge is a uni-directional connection between two vertices with an attribute
// value
type Edge struct {
From *Vertex
Value Identifier
To *Vertex
}
// Vertex is a vertex in a Graph. No fields should be modified directly, only
// through method calls
type Vertex struct {
VertexType
Value Identifier // Value is valid if-and-only-if VertexType is Value
In, Out []Edge
}
////////////////////////////////////////////////////////////////////////////////
// HalfEdge is an un-realized Edge which can't be used for anything except
// constructing graphs. It has no meaning on its own.
type HalfEdge struct {
// fromV will be the source vertex as-if the vertex (and any sub-vertices of
// it) doesn't already exist in the graph. If it or it's sub-vertices does
// already that will need to be taken into account when persisting into the
// graph
fromV vertex
val Identifier
}
// Identify implements the Identifier interface
func (he HalfEdge) Identify(h hash.Hash) {
fmt.Fprintln(h, "halfEdge")
he.fromV.Identify(h)
he.val.Identify(h)
}
// vertex is a representation of a vertex in the graph. Each Graph contains a
// set of all the Value vertex instances it knows about. Each of these contains
// all the input HalfEdges which are known for it. So you can think of these
// "top-level" Value vertex instances as root nodes in a tree, and each HalfEdge
// as a branch.
//
// If a HalfEdge contains a fromV which is a Value that vertex won't have its in
// slice populated no matter what. If fromV is a Junction it will be populated,
// with any sub-Value's not being populated and so-on recursively
//
// When a view is constructed in makeView these Value instances are deduplicated
// and the top-level one's in value is used to properly connect it.
type vertex struct {
VertexType
val Identifier
in []HalfEdge
}
// A Value vertex is unique by the value it contains
// A Junction vertex is unique by its input edges
func (v vertex) Identify(h hash.Hash) {
switch v.VertexType {
case Value:
fmt.Fprintln(h, "value")
v.val.Identify(h)
case Junction:
fmt.Fprintf(h, "junction:%d\n", len(v.in))
for _, in := range v.in {
in.Identify(h)
}
default:
panic(fmt.Sprintf("invalid VertexType:%#v", v))
}
}
func (v vertex) cp() vertex {
cp := v
cp.in = make([]HalfEdge, len(v.in))
copy(cp.in, v.in)
return cp
}
func (v vertex) hasHalfEdge(he HalfEdge) bool {
heID := identify(he)
for _, in := range v.in {
if identify(in) == heID {
return true
}
}
return false
}
// Graph is a wrapper around a set of connected Vertices
type Graph struct {
vM map[string]vertex // only contains value vertices
view map[string]*Vertex
}
// Null is the root empty graph, and is the base off which all graphs are built
var Null = &Graph{
vM: map[string]vertex{},
view: map[string]*Vertex{},
}
// this does _not_ copy the view, as it's assumed the only reason to copy a
// graph is to modify it anyway
func (g *Graph) cp() *Graph {
cp := &Graph{
vM: make(map[string]vertex, len(g.vM)),
}
for id, v := range g.vM {
cp.vM[id] = v
}
return cp
}
////////////////////////////////////////////////////////////////////////////////
// Graph creation
// ValueOut creates a HalfEdge which, when used to construct a Graph, represents
// an edge (with edgeVal attached to it) leaving the Value Vertex containing
// val.
//
// When constructing Graphs Value vertices are de-duplicated on their value. So
// multiple ValueOut HalfEdges constructed with the same val will be leaving the
// same Vertex instance in the constructed Graph.
func ValueOut(val, edgeVal Identifier) HalfEdge {
return HalfEdge{
fromV: vertex{
VertexType: Value,
val: val,
},
val: edgeVal,
}
}
// JunctionOut creates a HalfEdge which, when used to construct a Graph,
// represents an edge (with edgeVal attached to it) leaving the Junction Vertex
// comprised of the given ordered-set of input edges.
//
// When constructing Graphs Junction vertices are de-duplicated on their input
// edges. So multiple Junction HalfEdges constructed with the same set of input
// edges will be leaving the same Junction instance in the constructed Graph.
func JunctionOut(in []HalfEdge, edgeVal Identifier) HalfEdge {
return HalfEdge{
fromV: vertex{
VertexType: Junction,
in: in,
},
val: edgeVal,
}
}
// ValueIn takes a HalfEdge and connects it to the Value Vertex containing val,
// and returns the new Graph which reflects that connection. Any Vertices
// referenced within the HalfEdge which do not yet exist in the Graph will also
// be created in this step.
func (g *Graph) ValueIn(he HalfEdge, val Identifier) *Graph {
to := vertex{
VertexType: Value,
val: val,
}
toID := identify(to)
// if to is already in the graph, pull it out, as it might have existing in
// edges we want to keep
if exTo, ok := g.vM[toID]; ok {
to = exTo
}
// if the incoming edge already exists in to then there's nothing to do
if to.hasHalfEdge(he) {
return g
}
to = to.cp()
to.in = append(to.in, he)
g = g.cp()
// starting with to (which we always overwrite) go through vM and
// recursively add in any vertices which aren't already there
var persist func(vertex)
persist = func(v vertex) {
vID := identify(v)
if v.VertexType == Value {
if _, ok := g.vM[vID]; !ok {
g.vM[vID] = v
}
}
for _, e := range v.in {
persist(e.fromV)
}
}
delete(g.vM, toID)
persist(to)
return g
}
// TODO Merge
////////////////////////////////////////////////////////////////////////////////
// Graph traversal
func (g *Graph) makeView() {
if g.view != nil {
return
}
// view only contains value vertices, but we need to keep track of all
// vertices while constructing the view
g.view = make(map[string]*Vertex, len(g.vM))
all := map[string]*Vertex{}
var getV func(vertex, bool) *Vertex
getV = func(v vertex, top bool) *Vertex {
vID := identify(v)
V, ok := all[vID]
if !ok {
V = &Vertex{VertexType: v.VertexType, Value: v.val}
all[vID] = V
}
// we can be sure all Value vertices will be called with top==true at
// some point, so we only need to descend into the input edges if:
// * top is true
// * this is a junction's first time being gotten
if !top && (ok || v.VertexType != Junction) {
return V
}
V.In = make([]Edge, 0, len(v.in))
for i := range v.in {
fromV := getV(v.in[i].fromV, false)
e := Edge{From: fromV, Value: v.in[i].val, To: V}
fromV.Out = append(fromV.Out, e)
V.In = append(V.In, e)
}
if v.VertexType == Value {
g.view[identify(v.val)] = V
}
return V
}
for _, v := range g.vM {
getV(v, true)
}
}
// Value returns the Value Vertex for the given value. If the Graph doesn't
// contain a vertex for the value then nil is returned
func (g *Graph) Value(val Identifier) *Vertex {
g.makeView()
return g.view[identify(val)]
}
// Equal returns whether or not the two Graphs are equivalent in value
func Equal(g1, g2 *Graph) bool {
if len(g1.vM) != len(g2.vM) {
return false
}
for v1ID, v1 := range g1.vM {
v2, ok := g2.vM[v1ID]
if !ok {
return false
}
// since the vertices are values we must make sure their input sets are
// the same (which is tricky since they're unordered, unlike a
// junction's)
if len(v1.in) != len(v2.in) {
return false
}
for _, in := range v1.in {
if !v2.hasHalfEdge(in) {
return false
}
}
}
return true
}

310
lang/gg/gg_test.go Normal file
View File

@ -0,0 +1,310 @@
package gg
import (
"fmt"
"hash"
. "testing"
"github.com/stretchr/testify/assert"
)
type idAny struct {
i interface{}
}
func (i idAny) Identify(h hash.Hash) {
fmt.Fprintln(h, i)
}
func id(i interface{}) Identifier {
return idAny{i: i}
}
func edge(val string, from *Vertex) Edge {
return Edge{Value: id(val), From: from}
}
func value(val string, in ...Edge) *Vertex {
return &Vertex{
VertexType: Value,
Value: id(val),
In: in,
}
}
func junction(val string, in ...Edge) Edge {
return Edge{
From: &Vertex{
VertexType: Junction,
In: in,
},
Value: id(val),
}
}
func assertVertexEqual(t *T, exp, got *Vertex, msgAndArgs ...interface{}) bool {
var assertInner func(*Vertex, *Vertex, map[*Vertex]bool) bool
assertInner = func(exp, got *Vertex, m map[*Vertex]bool) bool {
// if got is already in m then we've already looked at it
if m[got] {
return true
}
m[got] = true
assert.Equal(t, exp.VertexType, got.VertexType, msgAndArgs...)
assert.Equal(t, exp.Value, got.Value, msgAndArgs...)
if !assert.Len(t, got.In, len(exp.In), msgAndArgs...) {
return false
}
for i := range exp.In {
assertInner(exp.In[i].From, got.In[i].From, m)
assert.Equal(t, exp.In[i].Value, got.In[i].Value, msgAndArgs...)
assert.Equal(t, got, got.In[i].To)
assert.Contains(t, got.In[i].From.Out, got.In[i])
}
return true
}
return assertInner(exp, got, map[*Vertex]bool{})
}
type graphTest struct {
name string
out func() *Graph
exp []*Vertex
}
func mkTest(name string, out func() *Graph, exp ...*Vertex) graphTest {
return graphTest{name: name, out: out, exp: exp}
}
func TestGraph(t *T) {
tests := []graphTest{
mkTest(
"values-basic",
func() *Graph {
return Null.ValueIn(ValueOut(id("v0"), id("e0")), id("v1"))
},
value("v0"),
value("v1", edge("e0", value("v0"))),
),
mkTest(
"values-2edges",
func() *Graph {
g0 := Null.ValueIn(ValueOut(id("v0"), id("e0")), id("v2"))
return g0.ValueIn(ValueOut(id("v1"), id("e1")), id("v2"))
},
value("v0"),
value("v1"),
value("v2",
edge("e0", value("v0")),
edge("e1", value("v1")),
),
),
mkTest(
"values-separate",
func() *Graph {
g0 := Null.ValueIn(ValueOut(id("v0"), id("e0")), id("v1"))
return g0.ValueIn(ValueOut(id("v2"), id("e2")), id("v3"))
},
value("v0"),
value("v1", edge("e0", value("v0"))),
value("v2"),
value("v3", edge("e2", value("v2"))),
),
mkTest(
"values-circular",
func() *Graph {
return Null.ValueIn(ValueOut(id("v0"), id("e")), id("v0"))
},
value("v0", edge("e", value("v0"))),
),
mkTest(
"values-circular2",
func() *Graph {
g0 := Null.ValueIn(ValueOut(id("v0"), id("e0")), id("v1"))
return g0.ValueIn(ValueOut(id("v1"), id("e1")), id("v0"))
},
value("v0", edge("e1", value("v1", edge("e0", value("v0"))))),
value("v1", edge("e0", value("v0", edge("e1", value("v1"))))),
),
mkTest(
"values-circular3",
func() *Graph {
g0 := Null.ValueIn(ValueOut(id("v0"), id("e0")), id("v1"))
g1 := g0.ValueIn(ValueOut(id("v1"), id("e1")), id("v2"))
return g1.ValueIn(ValueOut(id("v2"), id("e2")), id("v1"))
},
value("v0"),
value("v1",
edge("e0", value("v0")),
edge("e2", value("v2", edge("e1", value("v1")))),
),
value("v2", edge("e1", value("v1",
edge("e0", value("v0")),
edge("e2", value("v2")),
))),
),
mkTest(
"junction-basic",
func() *Graph {
e0 := ValueOut(id("v0"), id("e0"))
e1 := ValueOut(id("v1"), id("e1"))
ej0 := JunctionOut([]HalfEdge{e0, e1}, id("ej0"))
return Null.ValueIn(ej0, id("v2"))
},
value("v0"), value("v1"),
value("v2", junction("ej0",
edge("e0", value("v0")),
edge("e1", value("v1")),
)),
),
mkTest(
"junction-basic2",
func() *Graph {
e00 := ValueOut(id("v0"), id("e00"))
e10 := ValueOut(id("v1"), id("e10"))
ej0 := JunctionOut([]HalfEdge{e00, e10}, id("ej0"))
e01 := ValueOut(id("v0"), id("e01"))
e11 := ValueOut(id("v1"), id("e11"))
ej1 := JunctionOut([]HalfEdge{e01, e11}, id("ej1"))
ej2 := JunctionOut([]HalfEdge{ej0, ej1}, id("ej2"))
return Null.ValueIn(ej2, id("v2"))
},
value("v0"), value("v1"),
value("v2", junction("ej2",
junction("ej0",
edge("e00", value("v0")),
edge("e10", value("v1")),
),
junction("ej1",
edge("e01", value("v0")),
edge("e11", value("v1")),
),
)),
),
mkTest(
"junction-circular",
func() *Graph {
e0 := ValueOut(id("v0"), id("e0"))
e1 := ValueOut(id("v1"), id("e1"))
ej0 := JunctionOut([]HalfEdge{e0, e1}, id("ej0"))
g0 := Null.ValueIn(ej0, id("v2"))
e20 := ValueOut(id("v2"), id("e20"))
g1 := g0.ValueIn(e20, id("v0"))
e21 := ValueOut(id("v2"), id("e21"))
return g1.ValueIn(e21, id("v1"))
},
value("v0", edge("e20", value("v2", junction("ej0",
edge("e0", value("v0")),
edge("e1", value("v1", edge("e21", value("v2")))),
)))),
value("v1", edge("e21", value("v2", junction("ej0",
edge("e0", value("v0", edge("e20", value("v2")))),
edge("e1", value("v1")),
)))),
value("v2", junction("ej0",
edge("e0", value("v0", edge("e20", value("v2")))),
edge("e1", value("v1", edge("e21", value("v2")))),
)),
),
}
for i := range tests {
out := tests[i].out()
for j, exp := range tests[i].exp {
msgAndArgs := []interface{}{
"tests[%d].name:%q exp[%d].val:%q",
i, tests[i].name, j, exp.Value.(idAny).i,
}
v := out.Value(exp.Value)
if !assert.NotNil(t, v, msgAndArgs...) {
continue
}
assertVertexEqual(t, exp, v, msgAndArgs...)
}
// sanity check that graphs are equal to themselves
assert.True(t, Equal(out, out))
}
}
func TestGraphImmutability(t *T) {
e0 := ValueOut(id("v0"), id("e0"))
g0 := Null.ValueIn(e0, id("v1"))
assert.Nil(t, Null.Value(id("v0")))
assert.Nil(t, Null.Value(id("v1")))
assert.NotNil(t, g0.Value(id("v0")))
assert.NotNil(t, g0.Value(id("v1")))
// half-edges should be re-usable
e1 := ValueOut(id("v2"), id("e1"))
g1a := g0.ValueIn(e1, id("v3a"))
g1b := g0.ValueIn(e1, id("v3b"))
assertVertexEqual(t, value("v3a", edge("e1", value("v2"))), g1a.Value(id("v3a")))
assert.Nil(t, g1a.Value(id("v3b")))
assertVertexEqual(t, value("v3b", edge("e1", value("v2"))), g1b.Value(id("v3b")))
assert.Nil(t, g1b.Value(id("v3a")))
// ... even re-usable twice in succession
g2 := g0.ValueIn(e1, id("v3")).ValueIn(e1, id("v4"))
assert.Nil(t, g2.Value(id("v3b")))
assert.Nil(t, g2.Value(id("v3a")))
assertVertexEqual(t, value("v3", edge("e1", value("v2"))), g2.Value(id("v3")))
assertVertexEqual(t, value("v4", edge("e1", value("v2"))), g2.Value(id("v4")))
}
func TestGraphEqual(t *T) {
assertEqual := func(g1, g2 *Graph) {
assert.True(t, Equal(g1, g2))
assert.True(t, Equal(g2, g1))
}
assertNotEqual := func(g1, g2 *Graph) {
assert.False(t, Equal(g1, g2))
assert.False(t, Equal(g2, g1))
}
assertEqual(Null, Null) // duh
{
// graph is equal to itself, not to null
e0 := ValueOut(id("v0"), id("e0"))
g0 := Null.ValueIn(e0, id("v1"))
assertNotEqual(g0, Null)
assertEqual(g0, g0)
// adding the an existing edge again shouldn't do anything
assertEqual(g0, g0.ValueIn(e0, id("v1")))
// g1a and g1b have the same vertices, but the edges are different
g1a := g0.ValueIn(ValueOut(id("v0"), id("e1a")), id("v2"))
g1b := g0.ValueIn(ValueOut(id("v0"), id("e1b")), id("v2"))
assertNotEqual(g1a, g1b)
}
{ // equal construction should yield equality, even if out of order
ga := Null.ValueIn(ValueOut(id("v0"), id("e0")), id("v1"))
ga = ga.ValueIn(ValueOut(id("v1"), id("e1")), id("v2"))
gb := Null.ValueIn(ValueOut(id("v1"), id("e1")), id("v2"))
gb = gb.ValueIn(ValueOut(id("v0"), id("e0")), id("v1"))
assertEqual(ga, gb)
}
{ // junction basic test
e0 := ValueOut(id("v0"), id("e0"))
e1 := ValueOut(id("v1"), id("e1"))
ga := Null.ValueIn(JunctionOut([]HalfEdge{e0, e1}, id("ej")), id("v2"))
gb := Null.ValueIn(JunctionOut([]HalfEdge{e1, e0}, id("ej")), id("v2"))
assertEqual(ga, ga)
assertNotEqual(ga, gb)
}
}