diff --git a/graph/bolt/bolt_test.go b/graph/bolt/bolt_test.go index 986550b..e72d514 100644 --- a/graph/bolt/bolt_test.go +++ b/graph/bolt/bolt_test.go @@ -334,7 +334,7 @@ func TestSetIterator(t *testing.T) { } it.Reset() - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(qs.QuadsAllIterator()) and.AddSubIterator(it) @@ -354,7 +354,7 @@ func TestSetIterator(t *testing.T) { t.Errorf("Failed to get expected results, got:%v expect:%v", got, expect) } - and = iterator.NewAnd() + and = iterator.NewAnd(qs) and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B"))) and.AddSubIterator(it) @@ -393,7 +393,7 @@ func TestSetIterator(t *testing.T) { it.Reset() // Order is important - and = iterator.NewAnd() + and = iterator.NewAnd(qs) and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B"))) and.AddSubIterator(it) @@ -406,7 +406,7 @@ func TestSetIterator(t *testing.T) { it.Reset() // Order is important - and = iterator.NewAnd() + and = iterator.NewAnd(qs) and.AddSubIterator(it) and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B"))) diff --git a/graph/gaedatastore/quadstore_test.go b/graph/gaedatastore/quadstore_test.go index ea9d2b0..3b68170 100644 --- a/graph/gaedatastore/quadstore_test.go +++ b/graph/gaedatastore/quadstore_test.go @@ -282,12 +282,12 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) { all := qs.NodesAllIterator() - innerAnd := iterator.NewAnd() + innerAnd := iterator.NewAnd(qs) innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed2, quad.Predicate)) innerAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Object)) hasa := iterator.NewHasA(qs, innerAnd, quad.Subject) - outerAnd := iterator.NewAnd() + outerAnd := iterator.NewAnd(qs) outerAnd.AddSubIterator(fixed) outerAnd.AddSubIterator(hasa) diff --git a/graph/iterator.go b/graph/iterator.go index 281ea64..17bcec0 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -30,6 +30,18 @@ type Tagger struct { fixedTags map[string]Value } +// TODO(barakmich): Linkage is general enough that there are places we take +//the combined arguments `quad.Direction, graph.Value` that it may be worth +//converting these into Linkages. If nothing else, future indexed iterators may +//benefit from the shared representation + +// Linkage is a union type representing a set of values established for a given +// quad direction. +type Linkage struct { + Dir quad.Direction + Values []Value +} + // Add a tag to the iterator. func (t *Tagger) Add(tag string) { t.tags = append(t.tags, tag) diff --git a/graph/iterator/and_iterator.go b/graph/iterator/and_iterator.go index 418a469..f315878 100644 --- a/graph/iterator/and_iterator.go +++ b/graph/iterator/and_iterator.go @@ -31,13 +31,16 @@ type And struct { result graph.Value runstats graph.IteratorStats err error + qs graph.QuadStore } -// Creates a new And iterator. -func NewAnd() *And { +// NewAnd creates an And iterator. `qs` is only required when needing a handle +// for QuadStore-specific optimizations, otherwise nil is acceptable. +func NewAnd(qs graph.QuadStore) *And { return &And{ uid: NextUID(), internalIterators: make([]graph.Iterator, 0, 20), + qs: qs, } } @@ -79,7 +82,7 @@ func (it *And) TagResults(dst map[string]graph.Value) { } func (it *And) Clone() graph.Iterator { - and := NewAnd() + and := NewAnd(it.qs) and.AddSubIterator(it.primaryIt.Clone()) and.tags.CopyFrom(it) for _, sub := range it.internalIterators { diff --git a/graph/iterator/and_iterator_optimize.go b/graph/iterator/and_iterator_optimize.go index 276774e..05dc85d 100644 --- a/graph/iterator/and_iterator_optimize.go +++ b/graph/iterator/and_iterator_optimize.go @@ -78,7 +78,7 @@ func (it *And) Optimize() (graph.Iterator, bool) { // The easiest thing to do at this point is merely to create a new And iterator // and replace ourselves with our (reordered, optimized) clone. - newAnd := NewAnd() + newAnd := NewAnd(it.qs) // Add the subiterators in order. for _, sub := range its { @@ -95,6 +95,18 @@ func (it *And) Optimize() (graph.Iterator, bool) { // the new And (they were unchanged upon calling Optimize() on them, at the // start). it.cleanUp() + + // Ask the graph.QuadStore if we can be replaced. Often times, this is a great + // optimization opportunity (there's a fixed iterator underneath us, for + // example). + if it.qs != nil { + newReplacement, hasOne := it.qs.OptimizeIterator(newAnd) + if hasOne { + newAnd.Close() + return newReplacement, true + } + } + return newAnd, true } diff --git a/graph/iterator/and_iterator_optimize_test.go b/graph/iterator/and_iterator_optimize_test.go index 0ae0bc5..13ede49 100644 --- a/graph/iterator/and_iterator_optimize_test.go +++ b/graph/iterator/and_iterator_optimize_test.go @@ -26,10 +26,14 @@ import ( ) func TestIteratorPromotion(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } all := NewInt64(1, 3) fixed := NewFixed(Identity) fixed.Add(3) - a := NewAnd() + a := NewAnd(qs) a.AddSubIterator(all) a.AddSubIterator(fixed) all.Tagger().Add("a") @@ -51,9 +55,13 @@ func TestIteratorPromotion(t *testing.T) { } func TestNullIteratorAnd(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } all := NewInt64(1, 3) null := NewNull() - a := NewAnd() + a := NewAnd(qs) a.AddSubIterator(all) a.AddSubIterator(null) newIt, changed := a.Optimize() @@ -66,11 +74,15 @@ func TestNullIteratorAnd(t *testing.T) { } func TestReorderWithTag(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } all := NewInt64(100, 300) all.Tagger().Add("good") all2 := NewInt64(1, 30000) all2.Tagger().Add("slow") - a := NewAnd() + a := NewAnd(qs) // Make all2 the default iterator a.AddSubIterator(all2) a.AddSubIterator(all) @@ -92,11 +104,15 @@ func TestReorderWithTag(t *testing.T) { } func TestAndStatistics(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } all := NewInt64(100, 300) all.Tagger().Add("good") all2 := NewInt64(1, 30000) all2.Tagger().Add("slow") - a := NewAnd() + a := NewAnd(qs) // Make all2 the default iterator a.AddSubIterator(all2) a.AddSubIterator(all) diff --git a/graph/iterator/and_iterator_test.go b/graph/iterator/and_iterator_test.go index 1a9c131..0d603d6 100644 --- a/graph/iterator/and_iterator_test.go +++ b/graph/iterator/and_iterator_test.go @@ -23,10 +23,14 @@ import ( // Make sure that tags work on the And. func TestTag(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } fix1 := NewFixed(Identity) fix1.Add(234) fix1.Tagger().Add("foo") - and := NewAnd() + and := NewAnd(qs) and.AddSubIterator(fix1) and.Tagger().Add("bar") out := fix1.Tagger().Tags() @@ -56,6 +60,10 @@ func TestTag(t *testing.T) { // Do a simple itersection of fixed values. func TestAndAndFixedIterators(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } fix1 := NewFixed(Identity) fix1.Add(1) fix1.Add(2) @@ -65,7 +73,7 @@ func TestAndAndFixedIterators(t *testing.T) { fix2.Add(3) fix2.Add(4) fix2.Add(5) - and := NewAnd() + and := NewAnd(qs) and.AddSubIterator(fix1) and.AddSubIterator(fix2) // Should be as big as smallest subiterator @@ -94,6 +102,10 @@ func TestAndAndFixedIterators(t *testing.T) { // If there's no intersection, the size should still report the same, // but there should be nothing to Next() func TestNonOverlappingFixedIterators(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } fix1 := NewFixed(Identity) fix1.Add(1) fix1.Add(2) @@ -103,7 +115,7 @@ func TestNonOverlappingFixedIterators(t *testing.T) { fix2.Add(5) fix2.Add(6) fix2.Add(7) - and := NewAnd() + and := NewAnd(qs) and.AddSubIterator(fix1) and.AddSubIterator(fix2) // Should be as big as smallest subiterator @@ -122,9 +134,13 @@ func TestNonOverlappingFixedIterators(t *testing.T) { } func TestAllIterators(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } all1 := NewInt64(1, 5) all2 := NewInt64(4, 10) - and := NewAnd() + and := NewAnd(qs) and.AddSubIterator(all2) and.AddSubIterator(all1) @@ -142,10 +158,14 @@ func TestAllIterators(t *testing.T) { } func TestAndIteratorErr(t *testing.T) { + qs := &store{ + data: []string{}, + iter: NewFixed(Identity), + } wantErr := errors.New("unique") allErr := newTestIterator(false, wantErr) - and := NewAnd() + and := NewAnd(qs) and.AddSubIterator(allErr) and.AddSubIterator(NewInt64(1, 5)) diff --git a/graph/iterator/query_shape_test.go b/graph/iterator/query_shape_test.go index e041e7e..2fe38b8 100644 --- a/graph/iterator/query_shape_test.go +++ b/graph/iterator/query_shape_test.go @@ -23,7 +23,7 @@ import ( ) func hasaWithTag(qs graph.QuadStore, tag string, target string) *HasA { - and := NewAnd() + and := NewAnd(qs) obj := qs.FixedIterator() obj.Add(qs.ValueOf(target)) @@ -91,7 +91,7 @@ func TestQueryShape(t *testing.T) { } // Given a name-of-an-and-iterator's shape. - andInternal := NewAnd() + andInternal := NewAnd(qs) hasa1 := hasaWithTag(qs, "tag1", "cool") hasa1.Tagger().Add("hasa1") @@ -104,7 +104,7 @@ func TestQueryShape(t *testing.T) { pred := qs.FixedIterator() pred.Add(qs.ValueOf("name")) - and := NewAnd() + and := NewAnd(qs) and.AddSubIterator(NewLinksTo(qs, andInternal, quad.Subject)) and.AddSubIterator(NewLinksTo(qs, pred, quad.Predicate)) diff --git a/graph/leveldb/leveldb_test.go b/graph/leveldb/leveldb_test.go index 292b7dd..c63bbe3 100644 --- a/graph/leveldb/leveldb_test.go +++ b/graph/leveldb/leveldb_test.go @@ -333,7 +333,7 @@ func TestSetIterator(t *testing.T) { } it.Reset() - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(qs.QuadsAllIterator()) and.AddSubIterator(it) @@ -353,7 +353,7 @@ func TestSetIterator(t *testing.T) { t.Errorf("Failed to get expected results, got:%v expect:%v", got, expect) } - and = iterator.NewAnd() + and = iterator.NewAnd(qs) and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B"))) and.AddSubIterator(it) @@ -392,7 +392,7 @@ func TestSetIterator(t *testing.T) { it.Reset() // Order is important - and = iterator.NewAnd() + and = iterator.NewAnd(qs) and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B"))) and.AddSubIterator(it) @@ -405,7 +405,7 @@ func TestSetIterator(t *testing.T) { it.Reset() // Order is important - and = iterator.NewAnd() + and = iterator.NewAnd(qs) and.AddSubIterator(it) and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B"))) diff --git a/graph/memstore/quadstore_test.go b/graph/memstore/quadstore_test.go index 020ca5d..5bcf1d3 100644 --- a/graph/memstore/quadstore_test.go +++ b/graph/memstore/quadstore_test.go @@ -108,12 +108,12 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) { all := qs.NodesAllIterator() - innerAnd := iterator.NewAnd() + innerAnd := iterator.NewAnd(qs) innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed2, quad.Predicate)) innerAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Object)) hasa := iterator.NewHasA(qs, innerAnd, quad.Subject) - outerAnd := iterator.NewAnd() + outerAnd := iterator.NewAnd(qs) outerAnd.AddSubIterator(fixed) outerAnd.AddSubIterator(hasa) @@ -193,7 +193,7 @@ func TestRemoveQuad(t *testing.T) { fixed2 := qs.FixedIterator() fixed2.Add(qs.ValueOf("follows")) - innerAnd := iterator.NewAnd() + innerAnd := iterator.NewAnd(qs) innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed, quad.Subject)) innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed2, quad.Predicate)) diff --git a/graph/mongo/indexed_linksto.go b/graph/mongo/indexed_linksto.go new file mode 100644 index 0000000..3830390 --- /dev/null +++ b/graph/mongo/indexed_linksto.go @@ -0,0 +1,283 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongo + +import ( + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" + + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +var _ graph.Nexter = &LinksTo{} + +var linksToType graph.Type + +func init() { + linksToType = graph.RegisterIterator("mongo-linksto") +} + +// LinksTo is a MongoDB-dependent version of a LinksTo iterator. Like the normal +// LinksTo, it represents a set of links to a set of nodes, represented by its +// subiterator. However, this iterator may often be faster than the generic +// LinksTo, as it can use the secondary indices in Mongo as features within the +// Mongo query, reducing the size of the result set and speeding up iteration. +type LinksTo struct { + uid uint64 + collection string + tags graph.Tagger + qs *QuadStore + primaryIt graph.Iterator + dir quad.Direction + nextIt *mgo.Iter + result graph.Value + runstats graph.IteratorStats + lset []graph.Linkage + err error +} + +// NewLinksTo constructs a new indexed LinksTo iterator for Mongo around a direction +// and a subiterator of nodes. +func NewLinksTo(qs *QuadStore, it graph.Iterator, collection string, d quad.Direction, lset []graph.Linkage) *LinksTo { + return &LinksTo{ + uid: iterator.NextUID(), + qs: qs, + primaryIt: it, + dir: d, + nextIt: nil, + lset: lset, + collection: collection, + } +} + +func (it *LinksTo) buildConstraint() bson.M { + constraint := bson.M{} + for _, set := range it.lset { + var s []string + for _, v := range set.Values { + s = append(s, it.qs.NameOf(v)) + } + constraint[set.Dir.String()] = bson.M{"$in": s} + if len(s) == 1 { + constraint[set.Dir.String()] = s[0] + } + } + return constraint +} + +func (it *LinksTo) buildIteratorFor(d quad.Direction, val graph.Value) *mgo.Iter { + name := it.qs.NameOf(val) + constraint := it.buildConstraint() + constraint[d.String()] = name + return it.qs.db.C(it.collection).Find(constraint).Iter() +} + +func (it *LinksTo) UID() uint64 { + return it.uid +} + +func (it *LinksTo) Tagger() *graph.Tagger { + return &it.tags +} + +// Return the direction under consideration. +func (it *LinksTo) Direction() quad.Direction { return it.dir } + +// Tag these results, and our subiterator's results. +func (it *LinksTo) TagResults(dst map[string]graph.Value) { + for _, tag := range it.tags.Tags() { + dst[tag] = it.Result() + } + + for tag, value := range it.tags.Fixed() { + dst[tag] = value + } + + it.primaryIt.TagResults(dst) +} + +// DEPRECATED +func (it *LinksTo) ResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.Result()) + tree.AddSubtree(it.primaryIt.ResultTree()) + return tree +} + +// Optimize the LinksTo, by replacing it if it can be. +func (it *LinksTo) Optimize() (graph.Iterator, bool) { + return it, false +} + +func (it *LinksTo) Next() bool { + var result struct { + ID string `bson:"_id"` + Added []int64 `bson:"Added"` + Deleted []int64 `bson:"Deleted"` + } + graph.NextLogIn(it) + it.runstats.Next += 1 + if it.nextIt != nil && it.nextIt.Next(&result) { + it.runstats.ContainsNext += 1 + if it.collection == "quads" && len(result.Added) <= len(result.Deleted) { + return it.Next() + } + it.result = result.ID + return graph.NextLogOut(it, it.result, true) + } + + if it.nextIt != nil { + // If there's an error in the 'next' iterator, we save it and we're done. + it.err = it.nextIt.Err() + if it.err != nil { + return false + } + + } + // Subiterator is empty, get another one + if !graph.Next(it.primaryIt) { + // Possibly save error + it.err = it.primaryIt.Err() + + // We're out of nodes in our subiterator, so we're done as well. + return graph.NextLogOut(it, 0, false) + } + if it.nextIt != nil { + it.nextIt.Close() + } + it.nextIt = it.buildIteratorFor(it.dir, it.primaryIt.Result()) + + // Recurse -- return the first in the next set. + return it.Next() +} + +func (it *LinksTo) Err() error { + return it.err +} + +func (it *LinksTo) Result() graph.Value { + return it.result +} + +func (it *LinksTo) Close() error { + var err error + if it.nextIt != nil { + err = it.nextIt.Close() + } + + _err := it.primaryIt.Close() + if _err != nil && err == nil { + err = _err + } + + return err +} + +func (it *LinksTo) NextPath() bool { + ok := it.primaryIt.NextPath() + if !ok { + it.err = it.primaryIt.Err() + } + return ok +} + +func (it *LinksTo) Type() graph.Type { + return linksToType +} + +func (it *LinksTo) Clone() graph.Iterator { + m := NewLinksTo(it.qs, it.primaryIt.Clone(), it.collection, it.dir, it.lset) + m.tags.CopyFrom(it) + return m +} + +func (it *LinksTo) Contains(val graph.Value) bool { + graph.ContainsLogIn(it, val) + it.runstats.Contains += 1 + + for _, set := range it.lset { + dval := it.qs.QuadDirection(val, set.Dir) + good := false + for _, val := range set.Values { + if val == dval { + good = true + break + } + } + if !good { + return graph.ContainsLogOut(it, val, false) + } + } + + node := it.qs.QuadDirection(val, it.dir) + if it.primaryIt.Contains(node) { + it.result = val + return graph.ContainsLogOut(it, val, true) + } + it.err = it.primaryIt.Err() + return graph.ContainsLogOut(it, val, false) +} + +func (it *LinksTo) Describe() graph.Description { + primary := it.primaryIt.Describe() + return graph.Description{ + UID: it.UID(), + Type: it.Type(), + Direction: it.dir, + Iterator: &primary, + } +} + +func (it *LinksTo) Reset() { + it.primaryIt.Reset() + if it.nextIt != nil { + it.nextIt.Close() + } + it.nextIt = nil +} + +// Return a guess as to how big or costly it is to next the iterator. +func (it *LinksTo) Stats() graph.IteratorStats { + subitStats := it.primaryIt.Stats() + // TODO(barakmich): These should really come from the quadstore itself + fanoutFactor := int64(20) + checkConstant := int64(1) + nextConstant := int64(2) + + size := fanoutFactor * subitStats.Size + csize, _ := it.qs.getSize(it.collection, it.buildConstraint()) + if size > csize { + size = csize + } + + return graph.IteratorStats{ + NextCost: nextConstant + subitStats.NextCost, + ContainsCost: checkConstant + subitStats.ContainsCost, + Size: size, + Next: it.runstats.Next, + Contains: it.runstats.Contains, + ContainsNext: it.runstats.ContainsNext, + } +} + +func (it *LinksTo) Size() (int64, bool) { + return it.Stats().Size, false +} + +// Return a list containing only our subiterator. +func (it *LinksTo) SubIterators() []graph.Iterator { + return []graph.Iterator{it.primaryIt} +} diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index 6409901..5362971 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -47,13 +47,6 @@ func NewIterator(qs *QuadStore, collection string, d quad.Direction, val graph.V constraint := bson.M{d.String(): name} - size, err := qs.db.C(collection).Find(constraint).Count() - if err != nil { - // FIXME(kortschak) This should be passed back rather than just logging. - glog.Errorln("Trouble getting size for iterator! ", err) - return nil - } - return &Iterator{ uid: iterator.NextUID(), name: name, @@ -61,29 +54,29 @@ func NewIterator(qs *QuadStore, collection string, d quad.Direction, val graph.V collection: collection, qs: qs, dir: d, - iter: qs.db.C(collection).Find(constraint).Iter(), - size: int64(size), + iter: nil, + size: -1, hash: val.(string), isAll: false, } } -func NewAllIterator(qs *QuadStore, collection string) *Iterator { - size, err := qs.db.C(collection).Count() - if err != nil { - // FIXME(kortschak) This should be passed back rather than just logging. - glog.Errorln("Trouble getting size for iterator! ", err) - return nil +func (it *Iterator) makeMongoIterator() *mgo.Iter { + if it.isAll { + return it.qs.db.C(it.collection).Find(nil).Iter() } + return it.qs.db.C(it.collection).Find(it.constraint).Iter() +} +func NewAllIterator(qs *QuadStore, collection string) *Iterator { return &Iterator{ uid: iterator.NextUID(), qs: qs, dir: quad.Any, constraint: nil, collection: collection, - iter: qs.db.C(collection).Find(nil).Iter(), - size: int64(size), + iter: nil, + size: -1, hash: "", isAll: true, } @@ -94,13 +87,16 @@ func (it *Iterator) UID() uint64 { } func (it *Iterator) Reset() { - it.iter.Close() + it.Close() it.iter = it.qs.db.C(it.collection).Find(it.constraint).Iter() } func (it *Iterator) Close() error { - return it.iter.Close() + if it.iter != nil { + return it.iter.Close() + } + return nil } func (it *Iterator) Tagger() *graph.Tagger { @@ -134,6 +130,9 @@ func (it *Iterator) Next() bool { Added []int64 `bson:"Added"` Deleted []int64 `bson:"Deleted"` } + if it.iter == nil { + it.iter = it.makeMongoIterator() + } found := it.iter.Next(&result) if !found { err := it.iter.Err() @@ -197,6 +196,13 @@ func (it *Iterator) Contains(v graph.Value) bool { } func (it *Iterator) Size() (int64, bool) { + if it.size == -1 { + var err error + it.size, err = it.qs.getSize(it.collection, it.constraint) + if err != nil { + it.err = err + } + } return it.size, true } diff --git a/graph/mongo/lru.go b/graph/mongo/lru.go index aba2372..34f9db7 100644 --- a/graph/mongo/lru.go +++ b/graph/mongo/lru.go @@ -29,7 +29,7 @@ type cache struct { type kv struct { key string - value string + value interface{} } func newCache(size int) *cache { @@ -40,7 +40,7 @@ func newCache(size int) *cache { return &lru } -func (lru *cache) Put(key string, value string) { +func (lru *cache) Put(key string, value interface{}) { if _, ok := lru.Get(key); ok { return } @@ -51,12 +51,12 @@ func (lru *cache) Put(key string, value string) { lru.cache[key] = lru.priority.Front() } -func (lru *cache) Get(key string) (string, bool) { +func (lru *cache) Get(key string) (interface{}, bool) { if element, ok := lru.cache[key]; ok { lru.priority.MoveToFront(element) return element.Value.(kv).value, true } - return "", false + return nil, false } func (lru *cache) removeOldest() { diff --git a/graph/mongo/quadstore.go b/graph/mongo/quadstore.go index 2a74be9..a997ecd 100644 --- a/graph/mongo/quadstore.go +++ b/graph/mongo/quadstore.go @@ -48,6 +48,7 @@ type QuadStore struct { session *mgo.Session db *mgo.Database ids *cache + sizes *cache } func createNewMongoGraph(addr string, options graph.Options) error { @@ -106,6 +107,7 @@ func newQuadStore(addr string, options graph.Options) (graph.QuadStore, error) { qs.db = conn.DB(dbName) qs.session = conn qs.ids = newCache(1 << 16) + qs.sizes = newCache(1 << 16) return &qs, nil } @@ -313,7 +315,7 @@ func (qs *QuadStore) ValueOf(s string) graph.Value { func (qs *QuadStore) NameOf(v graph.Value) string { val, ok := qs.ids.Get(v.(string)) if ok { - return val + return val.(string) } var node MongoNode err := qs.db.C("nodes").FindId(v.(string)).One(&node) @@ -377,3 +379,27 @@ func (qs *QuadStore) QuadDirection(in graph.Value, d quad.Direction) graph.Value func (qs *QuadStore) Type() string { return QuadStoreType } + +func (qs *QuadStore) getSize(collection string, constraint bson.M) (int64, error) { + var size int + bytes, err := bson.Marshal(constraint) + if err != nil { + glog.Errorf("Couldn't marshal internal constraint") + return -1, err + } + key := collection + string(bytes) + if val, ok := qs.sizes.Get(key); ok { + return val.(int64), nil + } + if constraint == nil { + size, err = qs.db.C(collection).Count() + } else { + size, err = qs.db.C(collection).Find(constraint).Count() + } + if err != nil { + glog.Errorln("Trouble getting size for iterator! ", err) + return -1, err + } + qs.sizes.Put(key, int64(size)) + return int64(size), nil +} diff --git a/graph/mongo/quadstore_iterator_optimize.go b/graph/mongo/quadstore_iterator_optimize.go index 1fdd936..c85ba99 100644 --- a/graph/mongo/quadstore_iterator_optimize.go +++ b/graph/mongo/quadstore_iterator_optimize.go @@ -15,6 +15,8 @@ package mongo import ( + "github.com/barakmich/glog" + "github.com/google/cayley/graph" "github.com/google/cayley/graph/iterator" ) @@ -23,11 +25,74 @@ func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) switch it.Type() { case graph.LinksTo: return qs.optimizeLinksTo(it.(*iterator.LinksTo)) + case graph.And: + return qs.optimizeAndIterator(it.(*iterator.And)) } return it, false } +func (qs *QuadStore) optimizeAndIterator(it *iterator.And) (graph.Iterator, bool) { + // Fail fast if nothing can happen + glog.V(4).Infoln("Entering optimizeAndIterator", it.UID()) + found := false + for _, it := range it.SubIterators() { + glog.V(4).Infoln(it.Type()) + if it.Type() == mongoType { + found = true + } + } + if !found { + glog.V(4).Infoln("Aborting optimizeAndIterator") + return it, false + } + + newAnd := iterator.NewAnd(qs) + var mongoIt *Iterator + for _, it := range it.SubIterators() { + switch it.Type() { + case mongoType: + if mongoIt == nil { + mongoIt = it.(*Iterator) + } else { + newAnd.AddSubIterator(it) + } + case graph.LinksTo: + continue + default: + newAnd.AddSubIterator(it) + } + } + stats := mongoIt.Stats() + + lset := []graph.Linkage{ + { + Dir: mongoIt.dir, + Values: []graph.Value{qs.ValueOf(mongoIt.name)}, + }, + } + + n := 0 + for _, it := range it.SubIterators() { + if it.Type() == graph.LinksTo { + lto := it.(*iterator.LinksTo) + // Is it more effective to do the replacement, or let the mongo check the linksto? + ltostats := lto.Stats() + if (ltostats.ContainsCost+stats.NextCost)*stats.Size > (ltostats.NextCost+stats.ContainsCost)*ltostats.Size { + continue + } + newLto := NewLinksTo(qs, lto.SubIterators()[0], "quads", lto.Direction(), lset) + newAnd.AddSubIterator(newLto) + n++ + } + } + if n == 0 { + return it, false + } + + return newAnd.Optimize() +} + func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { subs := it.SubIterators() if len(subs) != 1 { diff --git a/graph/result_tree_evaluator_test.go b/graph/result_tree_evaluator_test.go index af4c07a..ee733f9 100644 --- a/graph/result_tree_evaluator_test.go +++ b/graph/result_tree_evaluator_test.go @@ -33,7 +33,7 @@ func TestSingleIterator(t *testing.T) { func TestAndIterator(t *testing.T) { all1 := iterator.NewInt64(1, 3) all2 := iterator.NewInt64(3, 5) - and := iterator.NewAnd() + and := iterator.NewAnd(nil) and.AddSubIterator(all1) and.AddSubIterator(all2) diff --git a/query/gremlin/build_iterator.go b/query/gremlin/build_iterator.go index f92d473..e49c2d4 100644 --- a/query/gremlin/build_iterator.go +++ b/query/gremlin/build_iterator.go @@ -133,7 +133,7 @@ func buildInOutIterator(obj *otto.Object, qs graph.QuadStore, base graph.Iterato in, out = out, in } lto := iterator.NewLinksTo(qs, base, in) - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(iterator.NewLinksTo(qs, predicateNodeIterator, quad.Predicate)) and.AddSubIterator(lto) return iterator.NewHasA(qs, and, out) @@ -182,11 +182,11 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It } predFixed := qs.FixedIterator() predFixed.Add(qs.ValueOf(stringArgs[0])) - subAnd := iterator.NewAnd() + subAnd := iterator.NewAnd(qs) subAnd.AddSubIterator(iterator.NewLinksTo(qs, predFixed, quad.Predicate)) subAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Object)) hasa := iterator.NewHasA(qs, subAnd, quad.Subject) - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(hasa) and.AddSubIterator(subIt) it = and @@ -202,11 +202,11 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It } predFixed := qs.FixedIterator() predFixed.Add(qs.ValueOf(stringArgs[0])) - subAnd := iterator.NewAnd() + subAnd := iterator.NewAnd(qs) subAnd.AddSubIterator(iterator.NewLinksTo(qs, predFixed, quad.Predicate)) subAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Subject)) hasa := iterator.NewHasA(qs, subAnd, quad.Object) - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(hasa) and.AddSubIterator(subIt) it = and @@ -220,11 +220,11 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It } predFixed := qs.FixedIterator() predFixed.Add(qs.ValueOf(stringArgs[0])) - subAnd := iterator.NewAnd() + subAnd := iterator.NewAnd(qs) subAnd.AddSubIterator(iterator.NewLinksTo(qs, predFixed, quad.Predicate)) subAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed, quad.Object)) hasa := iterator.NewHasA(qs, subAnd, quad.Subject) - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(hasa) and.AddSubIterator(subIt) it = and @@ -238,14 +238,14 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It } argIt := buildIteratorTree(firstArg.Object(), qs) - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(subIt) and.AddSubIterator(argIt) it = and case "back": arg, _ := obj.Get("_gremlin_back_chain") argIt := buildIteratorTree(arg.Object(), qs) - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(subIt) and.AddSubIterator(argIt) it = and @@ -254,7 +254,7 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It for _, name := range stringArgs { fixed.Add(qs.ValueOf(name)) } - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(fixed) and.AddSubIterator(subIt) it = and @@ -311,7 +311,7 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It toComplementIt := buildIteratorTree(firstArg.Object(), qs) notIt := iterator.NewNot(toComplementIt, allIt) - and := iterator.NewAnd() + and := iterator.NewAnd(qs) and.AddSubIterator(subIt) and.AddSubIterator(notIt) it = and diff --git a/query/mql/build_iterator.go b/query/mql/build_iterator.go index 8d81717..57ab8af 100644 --- a/query/mql/build_iterator.go +++ b/query/mql/build_iterator.go @@ -102,7 +102,7 @@ func (q *Query) buildIteratorTreeInternal(query interface{}, path Path) (it grap } func (q *Query) buildIteratorTreeMapInternal(query map[string]interface{}, path Path) (graph.Iterator, error) { - it := iterator.NewAnd() + it := iterator.NewAnd(q.ses.qs) it.AddSubIterator(q.ses.qs.NodesAllIterator()) var err error err = nil @@ -136,7 +136,7 @@ func (q *Query) buildIteratorTreeMapInternal(query map[string]interface{}, path if err != nil { return nil, err } - subAnd := iterator.NewAnd() + subAnd := iterator.NewAnd(q.ses.qs) predFixed := q.ses.qs.FixedIterator() predFixed.Add(q.ses.qs.ValueOf(pred)) subAnd.AddSubIterator(iterator.NewLinksTo(q.ses.qs, predFixed, quad.Predicate)) diff --git a/query/sexp/parser.go b/query/sexp/parser.go index 71e80d7..6f3adbc 100644 --- a/query/sexp/parser.go +++ b/query/sexp/parser.go @@ -213,7 +213,7 @@ func buildIteratorTree(tree *peg.ExpressionTree, qs graph.QuadStore) graph.Itera return lto case "RootConstraint": constraintCount := 0 - and := iterator.NewAnd() + and := iterator.NewAnd(qs) for _, c := range tree.Children { switch c.Name { case "NodeIdentifier": @@ -232,7 +232,7 @@ func buildIteratorTree(tree *peg.ExpressionTree, qs graph.QuadStore) graph.Itera var hasa *iterator.HasA topLevelDir := quad.Subject subItDir := quad.Object - subAnd := iterator.NewAnd() + subAnd := iterator.NewAnd(qs) isOptional := false for _, c := range tree.Children { switch c.Name {