Merge pull request #48 from kortschak/enumerate

Enumerate iterator types
This commit is contained in:
Barak Michener 2014-07-03 14:40:37 -04:00
commit f6fb8d8f67
23 changed files with 158 additions and 69 deletions

View file

@ -14,11 +14,12 @@
package graph
// Define the general iterator interface, as well as the BaseIterator which all
// Define the general iterator interface, as well as the Base iterator which all
// iterators can "inherit" from to get default iterator functionality.
import (
"strings"
"sync"
"github.com/barakmich/glog"
)
@ -96,7 +97,7 @@ type Iterator interface {
// Returns a string relating to what the function of the iterator is. By
// knowing the names of the iterators, we can devise optimization strategies.
Type() string
Type() Type
// Optimizes an iterator. Can replace the iterator, or merely move things
// around internally. if it chooses to replace it with a better iterator,
@ -112,9 +113,11 @@ type Iterator interface {
// Close the iterator and do internal cleanup.
Close()
// UID returns the unique identifier of the iterator.
UID() uintptr
}
// FixedIterator wraps iterators that are modifiable by addition of fixed value sets.
type FixedIterator interface {
Iterator
Add(Value)
@ -126,20 +129,83 @@ type IteratorStats struct {
Size int64
}
// Type enumerates the set of Iterator types.
type Type int
const (
Invalid Type = iota
All
And
Or
HasA
LinksTo
Comparison
Null
Fixed
Not
Optional
)
var (
// We use a sync.Mutex rather than an RWMutex since the client packages keep
// the Type that was returned, so the only possibility for contention is at
// initialization.
lock sync.Mutex
// These strings must be kept in order consistent with the Type const block above.
types = []string{
"invalid",
"all",
"and",
"or",
"hasa",
"linksto",
"comparison",
"null",
"fixed",
"not",
"optional",
}
)
// Register adds a new iterator type to the set of acceptable types, returning
// the registered Type.
// Calls to Register are idempotent and must be made prior to use of the iterator.
// The conventional approach for use is to include a call to Register in a package
// init() function, saving the Type to a private package var.
func Register(name string) Type {
lock.Lock()
defer lock.Unlock()
for i, t := range types {
if t == name {
return Type(i)
}
}
types = append(types, name)
return Type(len(types) - 1)
}
// String returns a string representation of the Type.
func (t Type) String() string {
if t < 0 || int(t) >= len(types) {
return "illegal-type"
}
return types[t]
}
// Utility logging functions for when an iterator gets called Next upon, or Check upon, as
// well as what they return. Highly useful for tracing the execution path of a query.
func CheckLogIn(it Iterator, val Value) {
if glog.V(4) {
glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type()), it.UID(), val)
glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type().String()), it.UID(), val)
}
}
func CheckLogOut(it Iterator, val Value, good bool) bool {
if glog.V(4) {
if good {
glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type()), it.UID(), val)
glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type().String()), it.UID(), val)
} else {
glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type()), it.UID(), val)
glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type().String()), it.UID(), val)
}
}
return good
@ -147,16 +213,16 @@ func CheckLogOut(it Iterator, val Value, good bool) bool {
func NextLogIn(it Iterator) {
if glog.V(4) {
glog.V(4).Infof("%s %d NEXT", strings.ToUpper(it.Type()), it.UID())
glog.V(4).Infof("%s %d NEXT", strings.ToUpper(it.Type().String()), it.UID())
}
}
func NextLogOut(it Iterator, val Value, ok bool) (Value, bool) {
if glog.V(4) {
if ok {
glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type()), it.UID(), val)
glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type().String()), it.UID(), val)
} else {
glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type()), it.UID())
glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type().String()), it.UID())
}
}
return val, ok

View file

@ -101,7 +101,7 @@ func (it *Int64) Check(tsv graph.Value) bool {
// The type of this iterator is an "all". This is important, as it puts it in
// the class of "all iterators.
func (it *Int64) Type() string { return "all" }
func (it *Int64) Type() graph.Type { return graph.All }
// There's nothing to optimize about this little iterator.
func (it *Int64) Optimize() (graph.Iterator, bool) { return it, false }

View file

@ -246,4 +246,4 @@ func (it *And) Close() {
}
// Register this as an "and" iterator.
func (it *And) Type() string { return "and" }
func (it *And) Type() graph.Type { return graph.And }

View file

@ -134,7 +134,7 @@ func (_ *And) optimizeReplacement(its []graph.Iterator) graph.Iterator {
func optimizeOrder(its []graph.Iterator) []graph.Iterator {
var (
// bad contains iterators that can't be (efficiently) nexted, such as
// "optional" or "not". Separate them out and tack them on at the end.
// graph.Optional or graph.Not. Separate them out and tack them on at the end.
out, bad []graph.Iterator
best graph.Iterator
bestCost = int64(1 << 62)
@ -257,25 +257,25 @@ func optimizeSubIterators(its []graph.Iterator) []graph.Iterator {
// Check a list of iterators for any Null iterators.
func hasAnyNullIterators(its []graph.Iterator) bool {
for _, it := range its {
if it.Type() == "null" {
if it.Type() == graph.Null {
return true
}
}
return false
}
// There are two "not-useful" iterators -- namely "null" which returns
// nothing, and "all" which returns everything. Particularly, we want
// to see if we're intersecting with a bunch of "all" iterators, and,
// There are two "not-useful" iterators -- namely graph.Null which returns
// nothing, and graph.All which returns everything. Particularly, we want
// to see if we're intersecting with a bunch of graph.All iterators, and,
// if we are, then we have only one useful iterator.
func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator {
usefulCount := 0
var usefulIt graph.Iterator
for _, it := range its {
switch it.Type() {
case "null", "all":
case graph.Null, graph.All:
continue
case "optional":
case graph.Optional:
// Optional is weird -- it's not useful, but we can't optimize
// away from it. Therefore, we skip this optimization
// if we see one.

View file

@ -21,6 +21,8 @@ import (
"reflect"
"sort"
"testing"
"github.com/google/cayley/graph"
)
func TestIteratorPromotion(t *testing.T) {
@ -37,7 +39,7 @@ func TestIteratorPromotion(t *testing.T) {
if !changed {
t.Error("Iterator didn't optimize")
}
if newIt.Type() != "fixed" {
if newIt.Type() != graph.Fixed {
t.Error("Expected fixed iterator")
}
tagsExpected := []string{"a", "b", "c"}
@ -58,7 +60,7 @@ func TestNullIteratorAnd(t *testing.T) {
if !changed {
t.Error("Didn't change")
}
if newIt.Type() != "null" {
if newIt.Type() != graph.Null {
t.Error("Expected null iterator, got ", newIt.Type())
}
}

View file

@ -99,9 +99,7 @@ func (it *Fixed) DebugString(indent int) string {
}
// Register this iterator as a Fixed iterator.
func (it *Fixed) Type() string {
return "fixed"
}
func (it *Fixed) Type() graph.Type { return graph.Fixed }
// Check if the passed value is equal to one of the values stored in the iterator.
func (it *Fixed) Check(v graph.Value) bool {

View file

@ -91,7 +91,7 @@ func (it *HasA) Optimize() (graph.Iterator, bool) {
newPrimary, changed := it.primaryIt.Optimize()
if changed {
it.primaryIt = newPrimary
if it.primaryIt.Type() == "null" {
if it.primaryIt.Type() == graph.Null {
return it.primaryIt, true
}
}
@ -220,4 +220,4 @@ func (it *HasA) Close() {
}
// Register this iterator as a HasA.
func (it *HasA) Type() string { return "hasa" }
func (it *HasA) Type() graph.Type { return graph.HasA }

View file

@ -174,7 +174,7 @@ func NewNull() *Null {
func (it *Null) Clone() graph.Iterator { return NewNull() }
// Name the null iterator.
func (it *Null) Type() string { return "null" }
func (it *Null) Type() graph.Type { return graph.Null }
// A good iterator will close itself when it returns true.
// Null has nothing it needs to do.

View file

@ -118,7 +118,7 @@ func (it *LinksTo) Optimize() (graph.Iterator, bool) {
newPrimary, changed := it.primaryIt.Optimize()
if changed {
it.primaryIt = newPrimary
if it.primaryIt.Type() == "null" {
if it.primaryIt.Type() == graph.Null {
it.nextIt.Close()
return it.primaryIt, true
}
@ -166,7 +166,7 @@ func (it *LinksTo) NextResult() bool {
}
// Register the LinksTo.
func (it *LinksTo) Type() string { return "linksto" }
func (it *LinksTo) Type() graph.Type { return graph.LinksTo }
// Return a guess as to how big or costly it is to next the iterator.
func (it *LinksTo) Stats() graph.IteratorStats {

View file

@ -104,7 +104,7 @@ func (it *Optional) TagResults(dst map[string]graph.Value) {
}
// Registers the optional iterator.
func (it *Optional) Type() string { return "optional" }
func (it *Optional) Type() graph.Type { return graph.Optional }
// Prints the optional and it's subiterator.
func (it *Optional) DebugString(indent int) string {

View file

@ -281,4 +281,4 @@ func (it *Or) Stats() graph.IteratorStats {
}
// Register this as an "or" graph.iterator.
func (it *Or) Type() string { return "or" }
func (it *Or) Type() graph.Type { return graph.Or }

View file

@ -115,18 +115,18 @@ func (qs *queryShape) MakeNode(it graph.Iterator) *Node {
}
switch it.Type() {
case "and":
case graph.And:
for _, sub := range it.SubIterators() {
qs.nodeId++
newNode := qs.MakeNode(sub)
if sub.Type() != "or" {
if sub.Type() != graph.Or {
qs.StealNode(&n, newNode)
} else {
qs.AddNode(newNode)
qs.AddLink(&Link{n.Id, newNode.Id, 0, 0})
}
}
case "fixed":
case graph.Fixed:
n.IsFixed = true
for {
val, more := it.Next()
@ -135,25 +135,25 @@ func (qs *queryShape) MakeNode(it graph.Iterator) *Node {
}
n.Values = append(n.Values, qs.ts.NameOf(val))
}
case "hasa":
case graph.HasA:
hasa := it.(*HasA)
qs.PushHasa(n.Id, hasa.dir)
qs.nodeId++
newNode := qs.MakeNode(hasa.primaryIt)
qs.AddNode(newNode)
qs.RemoveHasa()
case "or":
case graph.Or:
for _, sub := range it.SubIterators() {
qs.nodeId++
newNode := qs.MakeNode(sub)
if sub.Type() == "or" {
if sub.Type() == graph.Or {
qs.StealNode(&n, newNode)
} else {
qs.AddNode(newNode)
qs.AddLink(&Link{n.Id, newNode.Id, 0, 0})
}
}
case "linksto":
case graph.LinksTo:
n.IsLinkNode = true
lto := it.(*LinksTo)
qs.nodeId++
@ -167,15 +167,15 @@ func (qs *queryShape) MakeNode(it graph.Iterator) *Node {
} else {
qs.AddLink(&Link{newNode.Id, hasaID, 0, n.Id})
}
} else if lto.primaryIt.Type() == "fixed" {
} else if lto.primaryIt.Type() == graph.Fixed {
qs.StealNode(&n, newNode)
} else {
qs.AddNode(newNode)
}
case "optional":
case graph.Optional:
// Unsupported, for the moment
fallthrough
case "all":
case graph.All:
}
return &n
}

View file

@ -162,7 +162,7 @@ func (it *Comparison) TagResults(dst map[string]graph.Value) {
}
// Registers the value-comparison iterator.
func (it *Comparison) Type() string { return "value-comparison" }
func (it *Comparison) Type() graph.Type { return graph.Comparison }
// Prints the value-comparison and its subiterator.
func (it *Comparison) DebugString(indent int) string {

View file

@ -118,8 +118,8 @@ func (it *AllIterator) DebugString(indent int) string {
return fmt.Sprintf("%s(%s tags: %v leveldb size:%d %s %p)", strings.Repeat(" ", indent), it.Type(), it.Tags(), size, it.dir, it)
}
func (it *AllIterator) Type() string { return "all" }
func (it *AllIterator) Sorted() bool { return false }
func (it *AllIterator) Type() graph.Type { return graph.All }
func (it *AllIterator) Sorted() bool { return false }
func (it *AllIterator) Optimize() (graph.Iterator, bool) {
return it, false

View file

@ -195,8 +195,16 @@ func (it *Iterator) DebugString(indent int) string {
return fmt.Sprintf("%s(%s %d tags: %v dir: %s size:%d %s)", strings.Repeat(" ", indent), it.Type(), it.UID(), it.Tags(), it.dir, size, it.ts.NameOf(it.checkId))
}
func (it *Iterator) Type() string { return "leveldb" }
func (it *Iterator) Sorted() bool { return false }
var levelDBType graph.Type
func init() {
levelDBType = graph.Register("leveldb")
}
func Type() graph.Type { return levelDBType }
func (it *Iterator) Type() graph.Type { return levelDBType }
func (it *Iterator) Sorted() bool { return false }
func (it *Iterator) Optimize() (graph.Iterator, bool) {
return it, false

View file

@ -160,7 +160,7 @@ func TestIterator(t *testing.T) {
size, accurate := it.Size()
So(size, ShouldBeBetween, 0, 20)
So(accurate, ShouldBeFalse)
So(it.Type(), ShouldEqual, "all")
So(it.Type(), ShouldEqual, graph.All)
re_it, ok := it.Optimize()
So(ok, ShouldBeFalse)
So(re_it, ShouldPointTo, it)
@ -209,7 +209,7 @@ func TestIterator(t *testing.T) {
size, accurate := it.Size()
So(size, ShouldBeBetween, 0, 20)
So(accurate, ShouldBeFalse)
So(it.Type(), ShouldEqual, "all")
So(it.Type(), ShouldEqual, graph.All)
re_it, ok := it.Optimize()
So(ok, ShouldBeFalse)
So(re_it, ShouldPointTo, it)
@ -407,7 +407,7 @@ func TestOptimize(t *testing.T) {
oldIt := lto.Clone()
newIt, ok := lto.Optimize()
So(ok, ShouldBeTrue)
So(newIt.Type(), ShouldEqual, "leveldb")
So(newIt.Type(), ShouldEqual, Type())
Convey("Containing the right things", func() {
afterOp := extractTripleFromIterator(ts, newIt)

View file

@ -21,7 +21,7 @@ import (
func (ts *TripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {
switch it.Type() {
case "linksto":
case graph.LinksTo:
return ts.optimizeLinksTo(it.(*iterator.LinksTo))
}
@ -34,7 +34,7 @@ func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bo
return it, false
}
primary := subs[0]
if primary.Type() == "fixed" {
if primary.Type() == graph.Fixed {
size, _ := primary.Size()
if size == 1 {
val, ok := primary.Next()

View file

@ -101,12 +101,18 @@ func (it *Iterator) DebugString(indent int) string {
return fmt.Sprintf("%s(%s tags:%s size:%d %s)", strings.Repeat(" ", indent), it.Type(), it.Tags(), size, it.data)
}
func (it *Iterator) Type() string {
return "llrb"
}
func (it *Iterator) Sorted() bool {
return true
var memType graph.Type
func init() {
memType = graph.Register("llrb")
}
func Type() graph.Type { return memType }
func (it *Iterator) Type() graph.Type { return memType }
func (it *Iterator) Sorted() bool { return true }
func (it *Iterator) Optimize() (graph.Iterator, bool) {
return it, false
}

View file

@ -21,7 +21,7 @@ import (
func (ts *TripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {
switch it.Type() {
case "linksto":
case graph.LinksTo:
return ts.optimizeLinksTo(it.(*iterator.LinksTo))
}
@ -34,7 +34,7 @@ func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bo
return it, false
}
primary := subs[0]
if primary.Type() == "fixed" {
if primary.Type() == graph.Fixed {
size, _ := primary.Size()
if size == 1 {
val, ok := primary.Next()

View file

@ -105,7 +105,7 @@ func TestLinksToOptimization(t *testing.T) {
if !changed {
t.Error("Iterator didn't change")
}
if newIt.Type() != "llrb" {
if newIt.Type() != Type() {
t.Fatal("Didn't swap out to LLRB")
}
v := newIt.(*Iterator)

View file

@ -157,12 +157,21 @@ func (it *Iterator) Size() (int64, bool) {
return it.size, true
}
func (it *Iterator) Type() string {
if it.isAll {
return "all"
}
return "mongo"
var mongoType graph.Type
func init() {
mongoType = graph.Register("mongo")
}
func Type() graph.Type { return mongoType }
func (it *Iterator) Type() graph.Type {
if it.isAll {
return graph.All
}
return mongoType
}
func (it *Iterator) Sorted() bool { return true }
func (it *Iterator) Optimize() (graph.Iterator, bool) { return it, false }

View file

@ -21,7 +21,7 @@ import (
func (ts *TripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {
switch it.Type() {
case "linksto":
case graph.LinksTo:
return ts.optimizeLinksTo(it.(*iterator.LinksTo))
}
@ -34,7 +34,7 @@ func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bo
return it, false
}
primary := subs[0]
if primary.Type() == "fixed" {
if primary.Type() == graph.Fixed {
size, _ := primary.Size()
if size == 1 {
val, ok := primary.Next()

View file

@ -36,7 +36,7 @@ func TestParseSexpWithMemstore(t *testing.T) {
Convey("It should parse an empty query", func() {
it := BuildIteratorTreeForQuery(ts, "()")
So(it.Type(), ShouldEqual, "null")
So(it.Type(), ShouldEqual, graph.Null)
})
Convey("It should get a single triple linkage", func() {
@ -44,7 +44,7 @@ func TestParseSexpWithMemstore(t *testing.T) {
query := "($a (:can \"win\"))"
So(len(query), ShouldEqual, 17)
it := BuildIteratorTreeForQuery(ts, query)
So(it.Type(), ShouldEqual, "and")
So(it.Type(), ShouldEqual, graph.And)
out, ok := it.Next()
So(ok, ShouldBeTrue)
So(out, ShouldEqual, ts.ValueOf("i"))
@ -54,7 +54,7 @@ func TestParseSexpWithMemstore(t *testing.T) {
ts.AddTriple(&graph.Triple{"i", "can", "win", ""})
query := "(\"i\" (:can $a))"
it := BuildIteratorTreeForQuery(ts, query)
So(it.Type(), ShouldEqual, "and")
So(it.Type(), ShouldEqual, graph.And)
out, ok := it.Next()
So(ok, ShouldBeTrue)
So(out, ShouldEqual, ts.ValueOf("i"))
@ -71,7 +71,7 @@ func TestTreeConstraintParse(t *testing.T) {
"(:like\n" +
"($a (:is :good))))"
it := BuildIteratorTreeForQuery(ts, query)
if it.Type() != "and" {
if it.Type() != graph.And {
t.Error("Odd iterator tree. Got: %s", it.DebugString(0))
}
out, ok := it.Next()
@ -112,7 +112,7 @@ func TestMultipleConstraintParse(t *testing.T) {
"(:like :beer)\n" +
"(:like \"food\"))"
it := BuildIteratorTreeForQuery(ts, query)
if it.Type() != "and" {
if it.Type() != graph.And {
t.Error("Odd iterator tree. Got: %s", it.DebugString(0))
}
out, ok := it.Next()