415 lines
10 KiB
Go
415 lines
10 KiB
Go
|
package seq
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"hash/crc32"
|
||
|
"reflect"
|
||
|
|
||
|
"github.com/mediocregopher/ginger/types"
|
||
|
)
|
||
|
|
||
|
// This is an implementation of a persistent tree, which will then be used as
|
||
|
// the basis for vectors, hash maps, and hash sets.
|
||
|
|
||
|
type Setable interface {
|
||
|
|
||
|
// Returns an integer for the value. For two equivalent values (as defined
|
||
|
// by ==) Hash(i) should always return the same number. For multiple values
|
||
|
// of i, Hash should return different values if possible.
|
||
|
Hash(uint32) uint32
|
||
|
|
||
|
// Given an arbitrary value found in a Set, returns whether or not the two
|
||
|
// are equal
|
||
|
Equal(types.Elem) bool
|
||
|
}
|
||
|
|
||
|
// Returns an arbitrary integer for the given value/iteration tuple
|
||
|
func hash(v types.Elem, i uint32) uint32 {
|
||
|
switch vt := v.(type) {
|
||
|
|
||
|
case Setable:
|
||
|
return vt.Hash(i) % ARITY
|
||
|
|
||
|
case uint:
|
||
|
return uint32(vt) % ARITY
|
||
|
case uint8:
|
||
|
return uint32(vt) % ARITY
|
||
|
case uint32:
|
||
|
return uint32(vt) % ARITY
|
||
|
case uint64:
|
||
|
return uint32(vt) % ARITY
|
||
|
case int:
|
||
|
return uint32(vt) % ARITY
|
||
|
case int8:
|
||
|
return uint32(vt) % ARITY
|
||
|
case int16:
|
||
|
return uint32(vt) % ARITY
|
||
|
case int32:
|
||
|
return uint32(vt) % ARITY
|
||
|
case int64:
|
||
|
return uint32(vt) % ARITY
|
||
|
case float32:
|
||
|
return uint32(vt) % ARITY
|
||
|
case float64:
|
||
|
return uint32(vt) % ARITY
|
||
|
|
||
|
case string:
|
||
|
return crc32.ChecksumIEEE([]byte(vt)) % ARITY
|
||
|
|
||
|
case []byte:
|
||
|
return crc32.ChecksumIEEE(vt) % ARITY
|
||
|
|
||
|
default:
|
||
|
err := fmt.Sprintf("%s not hashable", reflect.TypeOf(v))
|
||
|
panic(err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns whether two values (potentially Setable's) are equivalent
|
||
|
func equal(v1, v2 types.Elem) bool {
|
||
|
if v1t, ok := v1.(Setable); ok {
|
||
|
return v1t.Equal(v2)
|
||
|
} else if v2t, ok := v2.(Setable); ok {
|
||
|
return v2t.Equal(v1)
|
||
|
} else if v1t, ok := v1.([]byte); ok {
|
||
|
if v2t, ok := v2.([]byte); ok {
|
||
|
if len(v1t) != len(v2t) {
|
||
|
return false
|
||
|
}
|
||
|
for i := range v1t {
|
||
|
if v1t[i] != v2t[i] {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
return false
|
||
|
} else {
|
||
|
return v1 == v2
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// The number of children each node in Set (implemented as a hash tree) can have
|
||
|
const ARITY = 32
|
||
|
|
||
|
// A Set is an implementation of Seq in the form of a persistant hash-tree. All
|
||
|
// public operations on it return a new, immutable form of the modified
|
||
|
// variable, leaving the old one intact. Immutability is implemented through
|
||
|
// node sharing, so operations aren't actually copying the entire hash-tree
|
||
|
// everytime, only the nodes which change, making the implementation very
|
||
|
// efficient compared to just copying.
|
||
|
//
|
||
|
// Items in sets need to be hashable and comparable. This means they either need
|
||
|
// to be some real numeric type (int, float32, etc...), string, []byte, or
|
||
|
// implement the Setable interface.
|
||
|
type Set struct {
|
||
|
|
||
|
// The value being held
|
||
|
val types.Elem
|
||
|
|
||
|
// Whether or not the held value has been set yet. Needed because the value
|
||
|
// could be nil
|
||
|
full bool
|
||
|
|
||
|
// Slice of kids of this node. Could be an empty slice
|
||
|
kids []*Set
|
||
|
|
||
|
// Number of values in this Set.
|
||
|
size uint64
|
||
|
}
|
||
|
|
||
|
// Returns a new Set of the given elements (or no elements, for an empty set)
|
||
|
func NewSet(vals ...types.Elem) *Set {
|
||
|
if len(vals) == 0 {
|
||
|
return nil
|
||
|
}
|
||
|
set := new(Set)
|
||
|
for i := range vals {
|
||
|
set.setValDirty(vals[i], 0)
|
||
|
}
|
||
|
set.size = uint64(len(vals))
|
||
|
return set
|
||
|
}
|
||
|
|
||
|
// Methods marked as "dirty" operate on the node in place, and potentially
|
||
|
// change it or its children.
|
||
|
|
||
|
// Dirty. Tries to set the val on this Set node, or initialize the kids slice if
|
||
|
// it can't. Returns whether or not the value was set and whether or not it was
|
||
|
// already set.
|
||
|
func (set *Set) shallowTrySetOrInit(val types.Elem) (bool, bool) {
|
||
|
if !set.full {
|
||
|
set.val = val
|
||
|
set.full = true
|
||
|
return true, false
|
||
|
} else if equal(set.val, val) {
|
||
|
set.val = val
|
||
|
set.full = true
|
||
|
return true, true
|
||
|
} else if set.kids == nil {
|
||
|
set.kids = make([]*Set, ARITY)
|
||
|
}
|
||
|
return false, false
|
||
|
}
|
||
|
|
||
|
// dirty (obviously). Sets a value on this node in place. Only used during
|
||
|
// initialization.
|
||
|
func (set *Set) setValDirty(val types.Elem, i uint32) {
|
||
|
if ok, _ := set.shallowTrySetOrInit(val); ok {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
h := hash(val, i)
|
||
|
if kid := set.kids[h]; kid != nil {
|
||
|
kid.setValDirty(val, i+1)
|
||
|
} else {
|
||
|
set.kids[h] = NewSet(val)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns a copy of this set node, including allocating and copying the kids
|
||
|
// slice.
|
||
|
func (set *Set) clone() *Set {
|
||
|
var newkids []*Set
|
||
|
if set.kids != nil {
|
||
|
newkids = make([]*Set, ARITY)
|
||
|
copy(newkids, set.kids)
|
||
|
}
|
||
|
cs := &Set{
|
||
|
val: set.val,
|
||
|
full: set.full,
|
||
|
kids: newkids,
|
||
|
size: set.size,
|
||
|
}
|
||
|
return cs
|
||
|
}
|
||
|
|
||
|
// The actual implementation of SetVal, because we need to pass i down the stack
|
||
|
func (set *Set) internalSetVal(val types.Elem, i uint32) (*Set, bool) {
|
||
|
if set == nil {
|
||
|
return NewSet(val), true
|
||
|
}
|
||
|
cset := set.clone()
|
||
|
if ok, prev := cset.shallowTrySetOrInit(val); ok {
|
||
|
return cset, !prev
|
||
|
}
|
||
|
|
||
|
h := hash(val, i)
|
||
|
newkid, ok := cset.kids[h].internalSetVal(val, i+1)
|
||
|
cset.kids[h] = newkid
|
||
|
return cset, ok
|
||
|
}
|
||
|
|
||
|
// Returns a new Set with the given value added to it. Also returns whether or
|
||
|
// not this is the first time setting this value (false if it was already there
|
||
|
// and was overwritten). Completes in O(log(N)) time.
|
||
|
func (set *Set) SetVal(val types.Elem) (*Set, bool) {
|
||
|
nset, ok := set.internalSetVal(val, 0)
|
||
|
if ok {
|
||
|
nset.size++
|
||
|
}
|
||
|
return nset, ok
|
||
|
}
|
||
|
|
||
|
// The actual implementation of DelVal, because we need to pass i down the stack
|
||
|
func (set *Set) internalDelVal(val types.Elem, i uint32) (*Set, bool) {
|
||
|
if set == nil {
|
||
|
return nil, false
|
||
|
} else if set.full && equal(val, set.val) {
|
||
|
cset := set.clone()
|
||
|
cset.val = nil
|
||
|
cset.full = false
|
||
|
return cset, true
|
||
|
} else if set.kids == nil {
|
||
|
return set, false
|
||
|
}
|
||
|
|
||
|
h := hash(val, i)
|
||
|
if newkid, ok := set.kids[h].internalDelVal(val, i+1); ok {
|
||
|
cset := set.clone()
|
||
|
cset.kids[h] = newkid
|
||
|
return cset, true
|
||
|
}
|
||
|
return set, false
|
||
|
}
|
||
|
|
||
|
// Returns a new Set with the given value removed from it and whether or not the
|
||
|
// value was actually removed. Completes in O(log(N)) time.
|
||
|
func (set *Set) DelVal(val types.Elem) (*Set, bool) {
|
||
|
nset, ok := set.internalDelVal(val, 0)
|
||
|
if ok && nset != nil {
|
||
|
nset.size--
|
||
|
}
|
||
|
return nset, ok
|
||
|
}
|
||
|
|
||
|
// The actual implementation of GetVal, because we need to pass i down the stack
|
||
|
func (set *Set) internalGetVal(val types.Elem, i uint32) (types.Elem, bool) {
|
||
|
if set == nil {
|
||
|
return nil, false
|
||
|
} else if set.full && equal(val, set.val) {
|
||
|
return set.val, true
|
||
|
} else if set.kids == nil {
|
||
|
return nil, false
|
||
|
}
|
||
|
|
||
|
h := hash(val, i)
|
||
|
return set.kids[h].internalGetVal(val, i+1)
|
||
|
}
|
||
|
|
||
|
// Returns a value from the Set, along with a boolean indiciating whether or
|
||
|
// not the value was found. Completes in O(log(N)) time.
|
||
|
func (set *Set) GetVal(val types.Elem) (types.Elem, bool) {
|
||
|
return set.internalGetVal(val, 0)
|
||
|
}
|
||
|
|
||
|
// Actual implementation of FirstRest. Because we need it to return a *Set
|
||
|
// instead of Seq for one case.
|
||
|
func (set *Set) internalFirstRest() (types.Elem, *Set, bool) {
|
||
|
if set == nil {
|
||
|
return nil, nil, false
|
||
|
}
|
||
|
|
||
|
if set.kids != nil {
|
||
|
var el types.Elem
|
||
|
var rest *Set
|
||
|
var ok bool
|
||
|
for i := range set.kids {
|
||
|
if el, rest, ok = set.kids[i].internalFirstRest(); ok {
|
||
|
cset := set.clone()
|
||
|
cset.kids[i] = rest
|
||
|
return el, cset, true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// We're not nil, but we don't have a value and no kids had values. We might
|
||
|
// as well be nil.
|
||
|
if !set.full {
|
||
|
return nil, nil, false
|
||
|
}
|
||
|
|
||
|
return set.val, nil, true
|
||
|
}
|
||
|
|
||
|
// Implementation of FirstRest for Seq interface. Completes in O(log(N)) time.
|
||
|
func (set *Set) FirstRest() (types.Elem, Seq, bool) {
|
||
|
el, restSet, ok := set.internalFirstRest()
|
||
|
if ok && restSet != nil {
|
||
|
restSet.size--
|
||
|
}
|
||
|
return el, Seq(restSet), ok
|
||
|
}
|
||
|
|
||
|
// Implementation of String for Stringer interface
|
||
|
func (set *Set) String() string {
|
||
|
return ToString(set, "#{", "}#")
|
||
|
}
|
||
|
|
||
|
// Returns the number of elements in the Set. Completes in O(1) time.
|
||
|
func (set *Set) Size() uint64 {
|
||
|
if set == nil {
|
||
|
return 0
|
||
|
}
|
||
|
return set.size
|
||
|
}
|
||
|
|
||
|
// Returns a Set with all of the elements of the original Set along with
|
||
|
// everything in the given Seq. If an element is present in both the Set and the
|
||
|
// Seq, the element in the Seq overwrites. Completes in O(M*log(N)), with M
|
||
|
// being the number of elements in the Seq and N the number of elements in the
|
||
|
// Set
|
||
|
func (set *Set) Union(s Seq) *Set {
|
||
|
if set == nil {
|
||
|
return ToSet(s)
|
||
|
}
|
||
|
|
||
|
cset := set.clone()
|
||
|
var el types.Elem
|
||
|
var ok bool
|
||
|
for {
|
||
|
if el, s, ok = s.FirstRest(); !ok {
|
||
|
return cset
|
||
|
} else if cset, ok = cset.SetVal(el); ok {
|
||
|
cset.size++
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns a Set with all of the elements in Seq that are also in Set. Completes
|
||
|
// in O(M*log(N)), with M being the number of elements in the Seq and N the
|
||
|
// number of elements in the Set
|
||
|
func (set *Set) Intersection(s Seq) *Set {
|
||
|
if set == nil {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
iset := NewSet()
|
||
|
var el types.Elem
|
||
|
var ok bool
|
||
|
for {
|
||
|
if el, s, ok = s.FirstRest(); !ok {
|
||
|
return iset
|
||
|
} else if _, ok = set.GetVal(el); ok {
|
||
|
iset, _ = iset.SetVal(el)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns a Set of all elements in the original Set that aren't in the Seq.
|
||
|
// Completes in O(M*log(N)), with M being the number of elements in the Seq and
|
||
|
// N the number of elements in the Set
|
||
|
func (set *Set) Difference(s Seq) *Set {
|
||
|
if set == nil {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
cset := set.clone()
|
||
|
var el types.Elem
|
||
|
var ok bool
|
||
|
for {
|
||
|
if el, s, ok = s.FirstRest(); !ok {
|
||
|
return cset
|
||
|
} else {
|
||
|
cset, _ = cset.DelVal(el)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns a Set of all elements that are either in the original Set or the
|
||
|
// given Seq, but not in both. Completes in O(M*log(N)), with M being the number
|
||
|
// of elements in the Seq and N the number of elements in the Set.
|
||
|
func (set *Set) SymDifference(s Seq) *Set {
|
||
|
if set == nil {
|
||
|
return ToSet(s)
|
||
|
}
|
||
|
|
||
|
cset := set.clone()
|
||
|
var cset2 *Set
|
||
|
var el types.Elem
|
||
|
var ok bool
|
||
|
for {
|
||
|
if el, s, ok = s.FirstRest(); !ok {
|
||
|
return cset
|
||
|
} else if cset2, ok = cset.DelVal(el); ok {
|
||
|
cset = cset2
|
||
|
} else {
|
||
|
cset, _ = cset.SetVal(el)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns the elements in the Seq as a set. In general this completes in
|
||
|
// O(N*log(N)) time (I think...). If the given Seq is already a Set it will
|
||
|
// complete in O(1) time. If it is a HashMap it will complete in O(1) time, and
|
||
|
// the resultant Set will be comprised of all KVs
|
||
|
func ToSet(s Seq) *Set {
|
||
|
if set, ok := s.(*Set); ok {
|
||
|
return set
|
||
|
} else if hm, ok := s.(*HashMap); ok {
|
||
|
return hm.set
|
||
|
}
|
||
|
vals := ToSlice(s)
|
||
|
return NewSet(vals...)
|
||
|
}
|