ginger/gg/gg.go

// Package gg implements ginger graph creation, traversal, and (de)serialization
package gg

import (
	"crypto/rand"
	"encoding/hex"
	"fmt"
	"strings"
)

// Value wraps a go value in a way such that it will be uniquely identified
// within any Graph and between Graphs. Use NewValue to create a Value instance.
// You can create an instance manually as long as ID is globally unique.
type Value struct {
	ID string
	V  interface{}
}

// NewValue returns a Value instance wrapping any go value. The Value returned
// will be independent of the passed in go value. So if the same go value is
// passed in twice then the two returned Value instances will be treated as
// being different values by Graph.
func NewValue(V interface{}) Value {
	b := make([]byte, 16)
	if _, err := rand.Read(b); err != nil {
		panic(err)
	}
	return Value{
		ID: hex.EncodeToString(b),
		V:  V,
	}
}

// VertexType enumerates the different possible vertex types
type VertexType string

const (
	// ValueVertex is a Vertex which contains exactly one value and has at least
	// one edge (either input or output)
	ValueVertex VertexType = "value"

	// TupleVertex is a Vertex which contains two or more in edges and
	// exactly one out edge
	//
	// TODO ^ what about 0 or 1 in edges?
	TupleVertex VertexType = "tuple"
)

// Edge is a uni-directional connection between two vertices with an attribute
// value
type Edge struct {
	From  *Vertex
	Value Value
	To    *Vertex
}

// Vertex is a vertex in a Graph. No fields should be modified directly, only
// through method calls
type Vertex struct {
	ID string
	VertexType
	Value   Value // Value is valid if-and-only-if VertexType is ValueVertex
	In, Out []Edge
}

////////////////////////////////////////////////////////////////////////////////

// OpenEdge is an un-realized Edge which can't be used for anything except
// constructing graphs. It has no meaning on its own.
type OpenEdge struct {
	// fromV will be the source vertex as-if the vertex (and any sub-vertices of
	// it) doesn't already exist in the graph. If it or it's sub-vertices does
	// already that will need to be taken into account when persisting into the
	// graph
	fromV vertex
	val   Value
}

func (oe OpenEdge) id() string {
	return fmt.Sprintf("(%s,%s)", oe.fromV.id, oe.val.ID)
}

// vertex is a representation of a vertex in the graph. Each Graph contains a
// set of all the Value vertex instances it knows about. Each of these contains
// all the input OpenEdges which are known for it. So you can think of these
// "top-level" Value vertex instances as root nodes in a tree, and each OpenEdge
// as a branch.
//
// If a OpenEdge contains a fromV which is a Value that vertex won't have its in
// slice populated no matter what. If fromV is a Tuple it will be populated,
// with any sub-Value's not being populated and so-on recursively
//
// When a view is constructed in makeView these Value instances are deduplicated
// and the top-level one's in value is used to properly connect it.
type vertex struct {
	id string
	VertexType
	val Value
	in  []OpenEdge
}

func (v vertex) cp() vertex {
	cp := v
	cp.in = make([]OpenEdge, len(v.in))
	copy(cp.in, v.in)
	return cp
}

func (v vertex) hasOpenEdge(oe OpenEdge) bool {
	oeID := oe.id()
	for _, in := range v.in {
		if in.id() == oeID {
			return true
		}
	}
	return false
}

func (v vertex) cpAndDelOpenEdge(oe OpenEdge) (vertex, bool) {
	oeID := oe.id()
	for i, in := range v.in {
		if in.id() == oeID {
			v = v.cp()
			v.in = append(v.in[:i], v.in[i+1:]...)
			return v, true
		}
	}
	return v, false
}

// Graph is a wrapper around a set of connected Vertices
type Graph struct {
	vM map[string]vertex // only contains value vertices

	// generated by makeView on-demand
	byVal map[string]*Vertex
	all   map[string]*Vertex
}

// ZeroGraph is the root empty graph, and is the base off which all graphs are
// built.
var ZeroGraph = &Graph{
	vM:    map[string]vertex{},
	byVal: map[string]*Vertex{},
	all:   map[string]*Vertex{},
}

// this does _not_ copy the view, as it's assumed the only reason to copy a
// graph is to modify it anyway
func (g *Graph) cp() *Graph {
	cp := &Graph{
		vM: make(map[string]vertex, len(g.vM)),
	}
	for vID, v := range g.vM {
		cp.vM[vID] = v
	}
	return cp
}

////////////////////////////////////////////////////////////////////////////////
// Graph creation

func mkVertex(typ VertexType, val Value, ins ...OpenEdge) vertex {
	v := vertex{VertexType: typ, in: ins}
	switch typ {
	case ValueVertex:
		v.id = val.ID
		v.val = val
	case TupleVertex:
		inIDs := make([]string, len(ins))
		for i := range ins {
			inIDs[i] = ins[i].id()
		}
		v.id = "[" + strings.Join(inIDs, ",") + "]"
	default:
		panic(fmt.Sprintf("unknown vertex type %q", typ))
	}
	return v
}

// ValueOut creates a OpenEdge which, when used to construct a Graph, represents
// an edge (with edgeVal attached to it) coming from the ValueVertex containing
// val.
//
// When constructing Graphs, Value vertices are de-duplicated on their Value. So
// multiple ValueOut OpenEdges constructed with the same val will be leaving the
// same Vertex instance in the constructed Graph.
func ValueOut(val, edgeVal Value) OpenEdge {
	return OpenEdge{fromV: mkVertex(ValueVertex, val), val: edgeVal}
}

// TupleOut creates an OpenEdge which, when used to construct a Graph,
// represents an edge (with edgeVal attached to it) coming from the
// TupleVertex comprised of the given ordered-set of input edges.
//
// When constructing Graphs Tuple vertices are de-duplicated on their input
// edges. So multiple Tuple OpenEdges constructed with the same set of input
// edges will be leaving the same Tuple instance in the constructed Graph.
func TupleOut(ins []OpenEdge, edgeVal Value) OpenEdge {
	return OpenEdge{
		fromV: mkVertex(TupleVertex, Value{}, ins...),
		val:   edgeVal,
	}
}

// AddValueIn takes a OpenEdge and connects it to the Value Vertex containing
// val, returning the new Graph which reflects that connection. Any Vertices
// referenced within toe OpenEdge which do not yet exist in the Graph will also
// be created in this step.
func (g *Graph) AddValueIn(oe OpenEdge, val Value) *Graph {
	to := mkVertex(ValueVertex, val)
	toID := to.id

	// if to is already in the graph, pull it out, as it might have existing in
	// edges we want to keep
	if exTo, ok := g.vM[toID]; ok {
		to = exTo
	}

	// if the incoming edge already exists in to then there's nothing to do
	if to.hasOpenEdge(oe) {
		return g
	}

	to = to.cp()
	to.in = append(to.in, oe)
	g = g.cp()

	// starting with to (which we always overwrite) go through vM and
	// recursively add in any vertices which aren't already there
	var persist func(vertex)
	persist = func(v vertex) {
		if v.VertexType == ValueVertex {
			vID := v.id
			if _, ok := g.vM[vID]; !ok {
				g.vM[vID] = v
			}
		} else {
			for _, e := range v.in {
				persist(e.fromV)
			}
		}
	}
	delete(g.vM, toID)
	persist(to)
	for _, e := range to.in {
		persist(e.fromV)
	}

	return g
}

// DelValueIn takes a OpenEdge and disconnects it from the Value Vertex
// containing val, returning the new Graph which reflects the disconnection. If
// the Value Vertex doesn't exist within the graph, or it doesn't have the given
// OpenEdge, no changes are made. Any vertices referenced by toe OpenEdge for
// which that edge is their only outgoing edge will be removed from the Graph.
func (g *Graph) DelValueIn(oe OpenEdge, val Value) *Graph {
	to := mkVertex(ValueVertex, val)
	toID := to.id

	// pull to out of the graph. if it's not there then bail
	var ok bool
	if to, ok = g.vM[toID]; !ok {
		return g
	}

	// get new copy of to without the half-edge, or return if the half-edge
	// wasn't even in to
	to, ok = to.cpAndDelOpenEdge(oe)
	if !ok {
		return g
	}
	g = g.cp()
	g.vM[toID] = to

	// connectedTo returns whether the vertex has any connections with the
	// vertex of the given id, descending recursively
	var connectedTo func(string, vertex) bool
	connectedTo = func(vID string, curr vertex) bool {
		for _, in := range curr.in {
			if in.fromV.VertexType == ValueVertex && in.fromV.id == vID {
				return true
			} else if in.fromV.VertexType == TupleVertex && connectedTo(vID, in.fromV) {
				return true
			}
		}
		return false
	}

	// isOrphaned returns whether the given vertex has any connections to other
	// nodes in the graph
	isOrphaned := func(v vertex) bool {
		vID := v.id
		if v, ok := g.vM[vID]; ok && len(v.in) > 0 {
			return false
		}
		for vID2, v2 := range g.vM {
			if vID2 == vID {
				continue
			} else if connectedTo(vID, v2) {
				return false
			}
		}
		return true
	}

	// if to is orphaned get rid of it
	if isOrphaned(to) {
		delete(g.vM, toID)
	}

	// rmOrphaned descends down the given OpenEdge and removes any Value
	// Vertices referenced in it which are now orphaned
	var rmOrphaned func(OpenEdge)
	rmOrphaned = func(oe OpenEdge) {
		if oe.fromV.VertexType == ValueVertex && isOrphaned(oe.fromV) {
			delete(g.vM, oe.fromV.id)
		} else if oe.fromV.VertexType == TupleVertex {
			for _, juncOe := range oe.fromV.in {
				rmOrphaned(juncOe)
			}
		}
	}
	rmOrphaned(oe)

	return g
}

// Union takes in another Graph and returns a new one which is the union of the
// two. Value vertices which are shared between the two will be merged so that
// the new vertex has the input edges of both.
//
// TODO it bothers me that the opposite of Disjoin is Union and not "Join"
func (g *Graph) Union(g2 *Graph) *Graph {
	g = g.cp()
	for vID, v2 := range g2.vM {
		v, ok := g.vM[vID]
		if !ok {
			v = v2
		} else {
			for _, v2e := range v2.in {
				if !v.hasOpenEdge(v2e) {
					v.in = append(v.in, v2e)
				}
			}
		}
		g.vM[vID] = v
	}
	return g
}

// Disjoin splits the Graph into as many independently connected Graphs as it
// contains. Each Graph returned will have vertices connected only within itself
// and not across to the other Graphs, and the Union of all returned Graphs will
// be the original again.
//
// The order of the Graphs returned is not deterministic. (TODO booooooo)
//
// ZeroGraph.Disjoin() returns empty slice.
func (g *Graph) Disjoin() []*Graph {
	m := map[string]*Graph{}    // maps each id to the Graph it belongs to
	mG := map[*Graph]struct{}{} // tracks unique Graphs created

	var connectedTo func(vertex) *Graph
	connectedTo = func(v vertex) *Graph {
		if v.VertexType == ValueVertex {
			if g := m[v.id]; g != nil {
				return g
			}
		}
		for _, oe := range v.in {
			if g := connectedTo(oe.fromV); g != nil {
				return g
			}
		}
		return nil
	}

	// used upon finding out that previously-thought-to-be disconnected vertices
	// aren't. Merges the two graphs they're connected into one and updates all
	// state internal to this function accordingly.
	rejoin := func(gDst, gSrc *Graph) {
		for id, v := range gSrc.vM {
			gDst.vM[id] = v
			m[id] = gDst
		}
		delete(mG, gSrc)
	}

	var connectTo func(vertex, *Graph)
	connectTo = func(v vertex, g *Graph) {
		if v.VertexType == ValueVertex {
			if g2, ok := m[v.id]; ok && g != g2 {
				rejoin(g, g2)
			}
			m[v.id] = g
		}
		for _, oe := range v.in {
			connectTo(oe.fromV, g)
		}
	}

	for id, v := range g.vM {
		gV := connectedTo(v)

		// if gV is nil it means this vertex is part of a new Graph which
		// nothing else has been connected to yet.
		if gV == nil {
			gV = ZeroGraph.cp()
			mG[gV] = struct{}{}
		}
		gV.vM[id] = v

		// do this no matter what, because we want to descend in to the in edges
		// and mark all of those as being part of this graph too
		connectTo(v, gV)
	}

	gg := make([]*Graph, 0, len(mG))
	for g := range mG {
		gg = append(gg, g)
	}
	return gg
}

////////////////////////////////////////////////////////////////////////////////
// Graph traversal

func (g *Graph) makeView() {
	if g.byVal != nil {
		return
	}

	g.byVal = make(map[string]*Vertex, len(g.vM))
	g.all = map[string]*Vertex{}

	var getV func(vertex, bool) *Vertex
	getV = func(v vertex, top bool) *Vertex {
		V, ok := g.all[v.id]
		if !ok {
			V = &Vertex{ID: v.id, VertexType: v.VertexType, Value: v.val}
			g.all[v.id] = V
		}

		// we can be sure all Value vertices will be called with top==true at
		// some point, so we only need to descend into the input edges if:
		// * top is true
		// * this is a tuple's first time being gotten
		if !top && (ok || v.VertexType != TupleVertex) {
			return V
		}

		V.In = make([]Edge, 0, len(v.in))
		for i := range v.in {
			fromV := getV(v.in[i].fromV, false)
			e := Edge{From: fromV, Value: v.in[i].val, To: V}
			fromV.Out = append(fromV.Out, e)
			V.In = append(V.In, e)
		}

		if v.VertexType == ValueVertex {
			g.byVal[v.val.ID] = V
		}

		return V
	}

	for _, v := range g.vM {
		getV(v, true)
	}
}

// ValueVertex returns the Value Vertex for the given value. If the Graph
// doesn't contain a vertex for the value then nil is returned
func (g *Graph) ValueVertex(val Value) *Vertex {
	g.makeView()
	return g.byVal[val.ID]
}

// ValueVertices returns all Value Vertices in the Graph
func (g *Graph) ValueVertices() []*Vertex {
	g.makeView()
	vv := make([]*Vertex, 0, len(g.byVal))
	for _, v := range g.byVal {
		vv = append(vv, v)
	}
	return vv
}

// Equal returns whether or not the two Graphs are equivalent in value
func Equal(g1, g2 *Graph) bool {
	if len(g1.vM) != len(g2.vM) {
		return false
	}
	for v1ID, v1 := range g1.vM {
		v2, ok := g2.vM[v1ID]
		if !ok {
			return false
		}

		// since the vertices are values we must make sure their input sets are
		// the same (which is tricky since they're unordered, unlike a
		// tuple's)
		if len(v1.in) != len(v2.in) {
			return false
		}
		for _, in := range v1.in {
			if !v2.hasOpenEdge(in) {
				return false
			}
		}
	}
	return true
}

// TODO Walk, but by edge
// TODO Walk, but without end. AKA FSM

// Iter will iterate through the Graph's vertices, calling the callback on every
// Vertex in the Graph once. The vertex order used is non-deterministic.  If the
// callback returns false the iteration is stopped.
func (g *Graph) Iter(callback func(*Vertex) bool) {
	g.makeView()
	if len(g.byVal) == 0 {
		return
	}

	seen := make(map[*Vertex]bool, len(g.byVal))
	var innerWalk func(*Vertex) bool
	innerWalk = func(v *Vertex) bool {
		if seen[v] {
			return true
		} else if !callback(v) {
			return false
		}
		seen[v] = true
		for _, e := range v.In {
			if !innerWalk(e.From) {
				return false
			}
		}
		return true
	}

	for _, v := range g.byVal {
		if !innerWalk(v) {
			return
		}
	}
}

// ByID returns all vertices indexed by their ID field
func (g *Graph) ByID() map[string]*Vertex {
	g.makeView()
	return g.all
}