Merge pull request #236 from barakmich/mongo_indexed_lto
Mongo Indexed LinksTo
This commit is contained in:
commit
0570c71601
19 changed files with 512 additions and 69 deletions
|
|
@ -334,7 +334,7 @@ func TestSetIterator(t *testing.T) {
|
|||
}
|
||||
it.Reset()
|
||||
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(qs.QuadsAllIterator())
|
||||
and.AddSubIterator(it)
|
||||
|
||||
|
|
@ -354,7 +354,7 @@ func TestSetIterator(t *testing.T) {
|
|||
t.Errorf("Failed to get expected results, got:%v expect:%v", got, expect)
|
||||
}
|
||||
|
||||
and = iterator.NewAnd()
|
||||
and = iterator.NewAnd(qs)
|
||||
and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B")))
|
||||
and.AddSubIterator(it)
|
||||
|
||||
|
|
@ -393,7 +393,7 @@ func TestSetIterator(t *testing.T) {
|
|||
it.Reset()
|
||||
|
||||
// Order is important
|
||||
and = iterator.NewAnd()
|
||||
and = iterator.NewAnd(qs)
|
||||
and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B")))
|
||||
and.AddSubIterator(it)
|
||||
|
||||
|
|
@ -406,7 +406,7 @@ func TestSetIterator(t *testing.T) {
|
|||
it.Reset()
|
||||
|
||||
// Order is important
|
||||
and = iterator.NewAnd()
|
||||
and = iterator.NewAnd(qs)
|
||||
and.AddSubIterator(it)
|
||||
and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B")))
|
||||
|
||||
|
|
|
|||
|
|
@ -282,12 +282,12 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) {
|
|||
|
||||
all := qs.NodesAllIterator()
|
||||
|
||||
innerAnd := iterator.NewAnd()
|
||||
innerAnd := iterator.NewAnd(qs)
|
||||
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed2, quad.Predicate))
|
||||
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Object))
|
||||
|
||||
hasa := iterator.NewHasA(qs, innerAnd, quad.Subject)
|
||||
outerAnd := iterator.NewAnd()
|
||||
outerAnd := iterator.NewAnd(qs)
|
||||
outerAnd.AddSubIterator(fixed)
|
||||
outerAnd.AddSubIterator(hasa)
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,18 @@ type Tagger struct {
|
|||
fixedTags map[string]Value
|
||||
}
|
||||
|
||||
// TODO(barakmich): Linkage is general enough that there are places we take
|
||||
//the combined arguments `quad.Direction, graph.Value` that it may be worth
|
||||
//converting these into Linkages. If nothing else, future indexed iterators may
|
||||
//benefit from the shared representation
|
||||
|
||||
// Linkage is a union type representing a set of values established for a given
|
||||
// quad direction.
|
||||
type Linkage struct {
|
||||
Dir quad.Direction
|
||||
Values []Value
|
||||
}
|
||||
|
||||
// Add a tag to the iterator.
|
||||
func (t *Tagger) Add(tag string) {
|
||||
t.tags = append(t.tags, tag)
|
||||
|
|
|
|||
|
|
@ -31,13 +31,16 @@ type And struct {
|
|||
result graph.Value
|
||||
runstats graph.IteratorStats
|
||||
err error
|
||||
qs graph.QuadStore
|
||||
}
|
||||
|
||||
// Creates a new And iterator.
|
||||
func NewAnd() *And {
|
||||
// NewAnd creates an And iterator. `qs` is only required when needing a handle
|
||||
// for QuadStore-specific optimizations, otherwise nil is acceptable.
|
||||
func NewAnd(qs graph.QuadStore) *And {
|
||||
return &And{
|
||||
uid: NextUID(),
|
||||
internalIterators: make([]graph.Iterator, 0, 20),
|
||||
qs: qs,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -79,7 +82,7 @@ func (it *And) TagResults(dst map[string]graph.Value) {
|
|||
}
|
||||
|
||||
func (it *And) Clone() graph.Iterator {
|
||||
and := NewAnd()
|
||||
and := NewAnd(it.qs)
|
||||
and.AddSubIterator(it.primaryIt.Clone())
|
||||
and.tags.CopyFrom(it)
|
||||
for _, sub := range it.internalIterators {
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
|
|||
|
||||
// The easiest thing to do at this point is merely to create a new And iterator
|
||||
// and replace ourselves with our (reordered, optimized) clone.
|
||||
newAnd := NewAnd()
|
||||
newAnd := NewAnd(it.qs)
|
||||
|
||||
// Add the subiterators in order.
|
||||
for _, sub := range its {
|
||||
|
|
@ -95,6 +95,18 @@ func (it *And) Optimize() (graph.Iterator, bool) {
|
|||
// the new And (they were unchanged upon calling Optimize() on them, at the
|
||||
// start).
|
||||
it.cleanUp()
|
||||
|
||||
// Ask the graph.QuadStore if we can be replaced. Often times, this is a great
|
||||
// optimization opportunity (there's a fixed iterator underneath us, for
|
||||
// example).
|
||||
if it.qs != nil {
|
||||
newReplacement, hasOne := it.qs.OptimizeIterator(newAnd)
|
||||
if hasOne {
|
||||
newAnd.Close()
|
||||
return newReplacement, true
|
||||
}
|
||||
}
|
||||
|
||||
return newAnd, true
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,10 +26,14 @@ import (
|
|||
)
|
||||
|
||||
func TestIteratorPromotion(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
all := NewInt64(1, 3)
|
||||
fixed := NewFixed(Identity)
|
||||
fixed.Add(3)
|
||||
a := NewAnd()
|
||||
a := NewAnd(qs)
|
||||
a.AddSubIterator(all)
|
||||
a.AddSubIterator(fixed)
|
||||
all.Tagger().Add("a")
|
||||
|
|
@ -51,9 +55,13 @@ func TestIteratorPromotion(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestNullIteratorAnd(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
all := NewInt64(1, 3)
|
||||
null := NewNull()
|
||||
a := NewAnd()
|
||||
a := NewAnd(qs)
|
||||
a.AddSubIterator(all)
|
||||
a.AddSubIterator(null)
|
||||
newIt, changed := a.Optimize()
|
||||
|
|
@ -66,11 +74,15 @@ func TestNullIteratorAnd(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestReorderWithTag(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
all := NewInt64(100, 300)
|
||||
all.Tagger().Add("good")
|
||||
all2 := NewInt64(1, 30000)
|
||||
all2.Tagger().Add("slow")
|
||||
a := NewAnd()
|
||||
a := NewAnd(qs)
|
||||
// Make all2 the default iterator
|
||||
a.AddSubIterator(all2)
|
||||
a.AddSubIterator(all)
|
||||
|
|
@ -92,11 +104,15 @@ func TestReorderWithTag(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestAndStatistics(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
all := NewInt64(100, 300)
|
||||
all.Tagger().Add("good")
|
||||
all2 := NewInt64(1, 30000)
|
||||
all2.Tagger().Add("slow")
|
||||
a := NewAnd()
|
||||
a := NewAnd(qs)
|
||||
// Make all2 the default iterator
|
||||
a.AddSubIterator(all2)
|
||||
a.AddSubIterator(all)
|
||||
|
|
|
|||
|
|
@ -23,10 +23,14 @@ import (
|
|||
|
||||
// Make sure that tags work on the And.
|
||||
func TestTag(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
fix1 := NewFixed(Identity)
|
||||
fix1.Add(234)
|
||||
fix1.Tagger().Add("foo")
|
||||
and := NewAnd()
|
||||
and := NewAnd(qs)
|
||||
and.AddSubIterator(fix1)
|
||||
and.Tagger().Add("bar")
|
||||
out := fix1.Tagger().Tags()
|
||||
|
|
@ -56,6 +60,10 @@ func TestTag(t *testing.T) {
|
|||
|
||||
// Do a simple itersection of fixed values.
|
||||
func TestAndAndFixedIterators(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
fix1 := NewFixed(Identity)
|
||||
fix1.Add(1)
|
||||
fix1.Add(2)
|
||||
|
|
@ -65,7 +73,7 @@ func TestAndAndFixedIterators(t *testing.T) {
|
|||
fix2.Add(3)
|
||||
fix2.Add(4)
|
||||
fix2.Add(5)
|
||||
and := NewAnd()
|
||||
and := NewAnd(qs)
|
||||
and.AddSubIterator(fix1)
|
||||
and.AddSubIterator(fix2)
|
||||
// Should be as big as smallest subiterator
|
||||
|
|
@ -94,6 +102,10 @@ func TestAndAndFixedIterators(t *testing.T) {
|
|||
// If there's no intersection, the size should still report the same,
|
||||
// but there should be nothing to Next()
|
||||
func TestNonOverlappingFixedIterators(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
fix1 := NewFixed(Identity)
|
||||
fix1.Add(1)
|
||||
fix1.Add(2)
|
||||
|
|
@ -103,7 +115,7 @@ func TestNonOverlappingFixedIterators(t *testing.T) {
|
|||
fix2.Add(5)
|
||||
fix2.Add(6)
|
||||
fix2.Add(7)
|
||||
and := NewAnd()
|
||||
and := NewAnd(qs)
|
||||
and.AddSubIterator(fix1)
|
||||
and.AddSubIterator(fix2)
|
||||
// Should be as big as smallest subiterator
|
||||
|
|
@ -122,9 +134,13 @@ func TestNonOverlappingFixedIterators(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestAllIterators(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
all1 := NewInt64(1, 5)
|
||||
all2 := NewInt64(4, 10)
|
||||
and := NewAnd()
|
||||
and := NewAnd(qs)
|
||||
and.AddSubIterator(all2)
|
||||
and.AddSubIterator(all1)
|
||||
|
||||
|
|
@ -142,10 +158,14 @@ func TestAllIterators(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestAndIteratorErr(t *testing.T) {
|
||||
qs := &store{
|
||||
data: []string{},
|
||||
iter: NewFixed(Identity),
|
||||
}
|
||||
wantErr := errors.New("unique")
|
||||
allErr := newTestIterator(false, wantErr)
|
||||
|
||||
and := NewAnd()
|
||||
and := NewAnd(qs)
|
||||
and.AddSubIterator(allErr)
|
||||
and.AddSubIterator(NewInt64(1, 5))
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ import (
|
|||
)
|
||||
|
||||
func hasaWithTag(qs graph.QuadStore, tag string, target string) *HasA {
|
||||
and := NewAnd()
|
||||
and := NewAnd(qs)
|
||||
|
||||
obj := qs.FixedIterator()
|
||||
obj.Add(qs.ValueOf(target))
|
||||
|
|
@ -91,7 +91,7 @@ func TestQueryShape(t *testing.T) {
|
|||
}
|
||||
|
||||
// Given a name-of-an-and-iterator's shape.
|
||||
andInternal := NewAnd()
|
||||
andInternal := NewAnd(qs)
|
||||
|
||||
hasa1 := hasaWithTag(qs, "tag1", "cool")
|
||||
hasa1.Tagger().Add("hasa1")
|
||||
|
|
@ -104,7 +104,7 @@ func TestQueryShape(t *testing.T) {
|
|||
pred := qs.FixedIterator()
|
||||
pred.Add(qs.ValueOf("name"))
|
||||
|
||||
and := NewAnd()
|
||||
and := NewAnd(qs)
|
||||
and.AddSubIterator(NewLinksTo(qs, andInternal, quad.Subject))
|
||||
and.AddSubIterator(NewLinksTo(qs, pred, quad.Predicate))
|
||||
|
||||
|
|
|
|||
|
|
@ -333,7 +333,7 @@ func TestSetIterator(t *testing.T) {
|
|||
}
|
||||
it.Reset()
|
||||
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(qs.QuadsAllIterator())
|
||||
and.AddSubIterator(it)
|
||||
|
||||
|
|
@ -353,7 +353,7 @@ func TestSetIterator(t *testing.T) {
|
|||
t.Errorf("Failed to get expected results, got:%v expect:%v", got, expect)
|
||||
}
|
||||
|
||||
and = iterator.NewAnd()
|
||||
and = iterator.NewAnd(qs)
|
||||
and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B")))
|
||||
and.AddSubIterator(it)
|
||||
|
||||
|
|
@ -392,7 +392,7 @@ func TestSetIterator(t *testing.T) {
|
|||
it.Reset()
|
||||
|
||||
// Order is important
|
||||
and = iterator.NewAnd()
|
||||
and = iterator.NewAnd(qs)
|
||||
and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B")))
|
||||
and.AddSubIterator(it)
|
||||
|
||||
|
|
@ -405,7 +405,7 @@ func TestSetIterator(t *testing.T) {
|
|||
it.Reset()
|
||||
|
||||
// Order is important
|
||||
and = iterator.NewAnd()
|
||||
and = iterator.NewAnd(qs)
|
||||
and.AddSubIterator(it)
|
||||
and.AddSubIterator(qs.QuadIterator(quad.Subject, qs.ValueOf("B")))
|
||||
|
||||
|
|
|
|||
|
|
@ -108,12 +108,12 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) {
|
|||
|
||||
all := qs.NodesAllIterator()
|
||||
|
||||
innerAnd := iterator.NewAnd()
|
||||
innerAnd := iterator.NewAnd(qs)
|
||||
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed2, quad.Predicate))
|
||||
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Object))
|
||||
|
||||
hasa := iterator.NewHasA(qs, innerAnd, quad.Subject)
|
||||
outerAnd := iterator.NewAnd()
|
||||
outerAnd := iterator.NewAnd(qs)
|
||||
outerAnd.AddSubIterator(fixed)
|
||||
outerAnd.AddSubIterator(hasa)
|
||||
|
||||
|
|
@ -193,7 +193,7 @@ func TestRemoveQuad(t *testing.T) {
|
|||
fixed2 := qs.FixedIterator()
|
||||
fixed2.Add(qs.ValueOf("follows"))
|
||||
|
||||
innerAnd := iterator.NewAnd()
|
||||
innerAnd := iterator.NewAnd(qs)
|
||||
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed, quad.Subject))
|
||||
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed2, quad.Predicate))
|
||||
|
||||
|
|
|
|||
283
graph/mongo/indexed_linksto.go
Normal file
283
graph/mongo/indexed_linksto.go
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mongo
|
||||
|
||||
import (
|
||||
"gopkg.in/mgo.v2"
|
||||
"gopkg.in/mgo.v2/bson"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
"github.com/google/cayley/graph/iterator"
|
||||
"github.com/google/cayley/quad"
|
||||
)
|
||||
|
||||
var _ graph.Nexter = &LinksTo{}
|
||||
|
||||
var linksToType graph.Type
|
||||
|
||||
func init() {
|
||||
linksToType = graph.RegisterIterator("mongo-linksto")
|
||||
}
|
||||
|
||||
// LinksTo is a MongoDB-dependent version of a LinksTo iterator. Like the normal
|
||||
// LinksTo, it represents a set of links to a set of nodes, represented by its
|
||||
// subiterator. However, this iterator may often be faster than the generic
|
||||
// LinksTo, as it can use the secondary indices in Mongo as features within the
|
||||
// Mongo query, reducing the size of the result set and speeding up iteration.
|
||||
type LinksTo struct {
|
||||
uid uint64
|
||||
collection string
|
||||
tags graph.Tagger
|
||||
qs *QuadStore
|
||||
primaryIt graph.Iterator
|
||||
dir quad.Direction
|
||||
nextIt *mgo.Iter
|
||||
result graph.Value
|
||||
runstats graph.IteratorStats
|
||||
lset []graph.Linkage
|
||||
err error
|
||||
}
|
||||
|
||||
// NewLinksTo constructs a new indexed LinksTo iterator for Mongo around a direction
|
||||
// and a subiterator of nodes.
|
||||
func NewLinksTo(qs *QuadStore, it graph.Iterator, collection string, d quad.Direction, lset []graph.Linkage) *LinksTo {
|
||||
return &LinksTo{
|
||||
uid: iterator.NextUID(),
|
||||
qs: qs,
|
||||
primaryIt: it,
|
||||
dir: d,
|
||||
nextIt: nil,
|
||||
lset: lset,
|
||||
collection: collection,
|
||||
}
|
||||
}
|
||||
|
||||
func (it *LinksTo) buildConstraint() bson.M {
|
||||
constraint := bson.M{}
|
||||
for _, set := range it.lset {
|
||||
var s []string
|
||||
for _, v := range set.Values {
|
||||
s = append(s, it.qs.NameOf(v))
|
||||
}
|
||||
constraint[set.Dir.String()] = bson.M{"$in": s}
|
||||
if len(s) == 1 {
|
||||
constraint[set.Dir.String()] = s[0]
|
||||
}
|
||||
}
|
||||
return constraint
|
||||
}
|
||||
|
||||
func (it *LinksTo) buildIteratorFor(d quad.Direction, val graph.Value) *mgo.Iter {
|
||||
name := it.qs.NameOf(val)
|
||||
constraint := it.buildConstraint()
|
||||
constraint[d.String()] = name
|
||||
return it.qs.db.C(it.collection).Find(constraint).Iter()
|
||||
}
|
||||
|
||||
func (it *LinksTo) UID() uint64 {
|
||||
return it.uid
|
||||
}
|
||||
|
||||
func (it *LinksTo) Tagger() *graph.Tagger {
|
||||
return &it.tags
|
||||
}
|
||||
|
||||
// Return the direction under consideration.
|
||||
func (it *LinksTo) Direction() quad.Direction { return it.dir }
|
||||
|
||||
// Tag these results, and our subiterator's results.
|
||||
func (it *LinksTo) TagResults(dst map[string]graph.Value) {
|
||||
for _, tag := range it.tags.Tags() {
|
||||
dst[tag] = it.Result()
|
||||
}
|
||||
|
||||
for tag, value := range it.tags.Fixed() {
|
||||
dst[tag] = value
|
||||
}
|
||||
|
||||
it.primaryIt.TagResults(dst)
|
||||
}
|
||||
|
||||
// DEPRECATED
|
||||
func (it *LinksTo) ResultTree() *graph.ResultTree {
|
||||
tree := graph.NewResultTree(it.Result())
|
||||
tree.AddSubtree(it.primaryIt.ResultTree())
|
||||
return tree
|
||||
}
|
||||
|
||||
// Optimize the LinksTo, by replacing it if it can be.
|
||||
func (it *LinksTo) Optimize() (graph.Iterator, bool) {
|
||||
return it, false
|
||||
}
|
||||
|
||||
func (it *LinksTo) Next() bool {
|
||||
var result struct {
|
||||
ID string `bson:"_id"`
|
||||
Added []int64 `bson:"Added"`
|
||||
Deleted []int64 `bson:"Deleted"`
|
||||
}
|
||||
graph.NextLogIn(it)
|
||||
it.runstats.Next += 1
|
||||
if it.nextIt != nil && it.nextIt.Next(&result) {
|
||||
it.runstats.ContainsNext += 1
|
||||
if it.collection == "quads" && len(result.Added) <= len(result.Deleted) {
|
||||
return it.Next()
|
||||
}
|
||||
it.result = result.ID
|
||||
return graph.NextLogOut(it, it.result, true)
|
||||
}
|
||||
|
||||
if it.nextIt != nil {
|
||||
// If there's an error in the 'next' iterator, we save it and we're done.
|
||||
it.err = it.nextIt.Err()
|
||||
if it.err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
}
|
||||
// Subiterator is empty, get another one
|
||||
if !graph.Next(it.primaryIt) {
|
||||
// Possibly save error
|
||||
it.err = it.primaryIt.Err()
|
||||
|
||||
// We're out of nodes in our subiterator, so we're done as well.
|
||||
return graph.NextLogOut(it, 0, false)
|
||||
}
|
||||
if it.nextIt != nil {
|
||||
it.nextIt.Close()
|
||||
}
|
||||
it.nextIt = it.buildIteratorFor(it.dir, it.primaryIt.Result())
|
||||
|
||||
// Recurse -- return the first in the next set.
|
||||
return it.Next()
|
||||
}
|
||||
|
||||
func (it *LinksTo) Err() error {
|
||||
return it.err
|
||||
}
|
||||
|
||||
func (it *LinksTo) Result() graph.Value {
|
||||
return it.result
|
||||
}
|
||||
|
||||
func (it *LinksTo) Close() error {
|
||||
var err error
|
||||
if it.nextIt != nil {
|
||||
err = it.nextIt.Close()
|
||||
}
|
||||
|
||||
_err := it.primaryIt.Close()
|
||||
if _err != nil && err == nil {
|
||||
err = _err
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (it *LinksTo) NextPath() bool {
|
||||
ok := it.primaryIt.NextPath()
|
||||
if !ok {
|
||||
it.err = it.primaryIt.Err()
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (it *LinksTo) Type() graph.Type {
|
||||
return linksToType
|
||||
}
|
||||
|
||||
func (it *LinksTo) Clone() graph.Iterator {
|
||||
m := NewLinksTo(it.qs, it.primaryIt.Clone(), it.collection, it.dir, it.lset)
|
||||
m.tags.CopyFrom(it)
|
||||
return m
|
||||
}
|
||||
|
||||
func (it *LinksTo) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
it.runstats.Contains += 1
|
||||
|
||||
for _, set := range it.lset {
|
||||
dval := it.qs.QuadDirection(val, set.Dir)
|
||||
good := false
|
||||
for _, val := range set.Values {
|
||||
if val == dval {
|
||||
good = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !good {
|
||||
return graph.ContainsLogOut(it, val, false)
|
||||
}
|
||||
}
|
||||
|
||||
node := it.qs.QuadDirection(val, it.dir)
|
||||
if it.primaryIt.Contains(node) {
|
||||
it.result = val
|
||||
return graph.ContainsLogOut(it, val, true)
|
||||
}
|
||||
it.err = it.primaryIt.Err()
|
||||
return graph.ContainsLogOut(it, val, false)
|
||||
}
|
||||
|
||||
func (it *LinksTo) Describe() graph.Description {
|
||||
primary := it.primaryIt.Describe()
|
||||
return graph.Description{
|
||||
UID: it.UID(),
|
||||
Type: it.Type(),
|
||||
Direction: it.dir,
|
||||
Iterator: &primary,
|
||||
}
|
||||
}
|
||||
|
||||
func (it *LinksTo) Reset() {
|
||||
it.primaryIt.Reset()
|
||||
if it.nextIt != nil {
|
||||
it.nextIt.Close()
|
||||
}
|
||||
it.nextIt = nil
|
||||
}
|
||||
|
||||
// Return a guess as to how big or costly it is to next the iterator.
|
||||
func (it *LinksTo) Stats() graph.IteratorStats {
|
||||
subitStats := it.primaryIt.Stats()
|
||||
// TODO(barakmich): These should really come from the quadstore itself
|
||||
fanoutFactor := int64(20)
|
||||
checkConstant := int64(1)
|
||||
nextConstant := int64(2)
|
||||
|
||||
size := fanoutFactor * subitStats.Size
|
||||
csize, _ := it.qs.getSize(it.collection, it.buildConstraint())
|
||||
if size > csize {
|
||||
size = csize
|
||||
}
|
||||
|
||||
return graph.IteratorStats{
|
||||
NextCost: nextConstant + subitStats.NextCost,
|
||||
ContainsCost: checkConstant + subitStats.ContainsCost,
|
||||
Size: size,
|
||||
Next: it.runstats.Next,
|
||||
Contains: it.runstats.Contains,
|
||||
ContainsNext: it.runstats.ContainsNext,
|
||||
}
|
||||
}
|
||||
|
||||
func (it *LinksTo) Size() (int64, bool) {
|
||||
return it.Stats().Size, false
|
||||
}
|
||||
|
||||
// Return a list containing only our subiterator.
|
||||
func (it *LinksTo) SubIterators() []graph.Iterator {
|
||||
return []graph.Iterator{it.primaryIt}
|
||||
}
|
||||
|
|
@ -47,13 +47,6 @@ func NewIterator(qs *QuadStore, collection string, d quad.Direction, val graph.V
|
|||
|
||||
constraint := bson.M{d.String(): name}
|
||||
|
||||
size, err := qs.db.C(collection).Find(constraint).Count()
|
||||
if err != nil {
|
||||
// FIXME(kortschak) This should be passed back rather than just logging.
|
||||
glog.Errorln("Trouble getting size for iterator! ", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
return &Iterator{
|
||||
uid: iterator.NextUID(),
|
||||
name: name,
|
||||
|
|
@ -61,29 +54,29 @@ func NewIterator(qs *QuadStore, collection string, d quad.Direction, val graph.V
|
|||
collection: collection,
|
||||
qs: qs,
|
||||
dir: d,
|
||||
iter: qs.db.C(collection).Find(constraint).Iter(),
|
||||
size: int64(size),
|
||||
iter: nil,
|
||||
size: -1,
|
||||
hash: val.(string),
|
||||
isAll: false,
|
||||
}
|
||||
}
|
||||
|
||||
func NewAllIterator(qs *QuadStore, collection string) *Iterator {
|
||||
size, err := qs.db.C(collection).Count()
|
||||
if err != nil {
|
||||
// FIXME(kortschak) This should be passed back rather than just logging.
|
||||
glog.Errorln("Trouble getting size for iterator! ", err)
|
||||
return nil
|
||||
func (it *Iterator) makeMongoIterator() *mgo.Iter {
|
||||
if it.isAll {
|
||||
return it.qs.db.C(it.collection).Find(nil).Iter()
|
||||
}
|
||||
return it.qs.db.C(it.collection).Find(it.constraint).Iter()
|
||||
}
|
||||
|
||||
func NewAllIterator(qs *QuadStore, collection string) *Iterator {
|
||||
return &Iterator{
|
||||
uid: iterator.NextUID(),
|
||||
qs: qs,
|
||||
dir: quad.Any,
|
||||
constraint: nil,
|
||||
collection: collection,
|
||||
iter: qs.db.C(collection).Find(nil).Iter(),
|
||||
size: int64(size),
|
||||
iter: nil,
|
||||
size: -1,
|
||||
hash: "",
|
||||
isAll: true,
|
||||
}
|
||||
|
|
@ -94,13 +87,16 @@ func (it *Iterator) UID() uint64 {
|
|||
}
|
||||
|
||||
func (it *Iterator) Reset() {
|
||||
it.iter.Close()
|
||||
it.Close()
|
||||
it.iter = it.qs.db.C(it.collection).Find(it.constraint).Iter()
|
||||
|
||||
}
|
||||
|
||||
func (it *Iterator) Close() error {
|
||||
return it.iter.Close()
|
||||
if it.iter != nil {
|
||||
return it.iter.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (it *Iterator) Tagger() *graph.Tagger {
|
||||
|
|
@ -134,6 +130,9 @@ func (it *Iterator) Next() bool {
|
|||
Added []int64 `bson:"Added"`
|
||||
Deleted []int64 `bson:"Deleted"`
|
||||
}
|
||||
if it.iter == nil {
|
||||
it.iter = it.makeMongoIterator()
|
||||
}
|
||||
found := it.iter.Next(&result)
|
||||
if !found {
|
||||
err := it.iter.Err()
|
||||
|
|
@ -197,6 +196,13 @@ func (it *Iterator) Contains(v graph.Value) bool {
|
|||
}
|
||||
|
||||
func (it *Iterator) Size() (int64, bool) {
|
||||
if it.size == -1 {
|
||||
var err error
|
||||
it.size, err = it.qs.getSize(it.collection, it.constraint)
|
||||
if err != nil {
|
||||
it.err = err
|
||||
}
|
||||
}
|
||||
return it.size, true
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ type cache struct {
|
|||
|
||||
type kv struct {
|
||||
key string
|
||||
value string
|
||||
value interface{}
|
||||
}
|
||||
|
||||
func newCache(size int) *cache {
|
||||
|
|
@ -40,7 +40,7 @@ func newCache(size int) *cache {
|
|||
return &lru
|
||||
}
|
||||
|
||||
func (lru *cache) Put(key string, value string) {
|
||||
func (lru *cache) Put(key string, value interface{}) {
|
||||
if _, ok := lru.Get(key); ok {
|
||||
return
|
||||
}
|
||||
|
|
@ -51,12 +51,12 @@ func (lru *cache) Put(key string, value string) {
|
|||
lru.cache[key] = lru.priority.Front()
|
||||
}
|
||||
|
||||
func (lru *cache) Get(key string) (string, bool) {
|
||||
func (lru *cache) Get(key string) (interface{}, bool) {
|
||||
if element, ok := lru.cache[key]; ok {
|
||||
lru.priority.MoveToFront(element)
|
||||
return element.Value.(kv).value, true
|
||||
}
|
||||
return "", false
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (lru *cache) removeOldest() {
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ type QuadStore struct {
|
|||
session *mgo.Session
|
||||
db *mgo.Database
|
||||
ids *cache
|
||||
sizes *cache
|
||||
}
|
||||
|
||||
func createNewMongoGraph(addr string, options graph.Options) error {
|
||||
|
|
@ -106,6 +107,7 @@ func newQuadStore(addr string, options graph.Options) (graph.QuadStore, error) {
|
|||
qs.db = conn.DB(dbName)
|
||||
qs.session = conn
|
||||
qs.ids = newCache(1 << 16)
|
||||
qs.sizes = newCache(1 << 16)
|
||||
return &qs, nil
|
||||
}
|
||||
|
||||
|
|
@ -313,7 +315,7 @@ func (qs *QuadStore) ValueOf(s string) graph.Value {
|
|||
func (qs *QuadStore) NameOf(v graph.Value) string {
|
||||
val, ok := qs.ids.Get(v.(string))
|
||||
if ok {
|
||||
return val
|
||||
return val.(string)
|
||||
}
|
||||
var node MongoNode
|
||||
err := qs.db.C("nodes").FindId(v.(string)).One(&node)
|
||||
|
|
@ -377,3 +379,27 @@ func (qs *QuadStore) QuadDirection(in graph.Value, d quad.Direction) graph.Value
|
|||
func (qs *QuadStore) Type() string {
|
||||
return QuadStoreType
|
||||
}
|
||||
|
||||
func (qs *QuadStore) getSize(collection string, constraint bson.M) (int64, error) {
|
||||
var size int
|
||||
bytes, err := bson.Marshal(constraint)
|
||||
if err != nil {
|
||||
glog.Errorf("Couldn't marshal internal constraint")
|
||||
return -1, err
|
||||
}
|
||||
key := collection + string(bytes)
|
||||
if val, ok := qs.sizes.Get(key); ok {
|
||||
return val.(int64), nil
|
||||
}
|
||||
if constraint == nil {
|
||||
size, err = qs.db.C(collection).Count()
|
||||
} else {
|
||||
size, err = qs.db.C(collection).Find(constraint).Count()
|
||||
}
|
||||
if err != nil {
|
||||
glog.Errorln("Trouble getting size for iterator! ", err)
|
||||
return -1, err
|
||||
}
|
||||
qs.sizes.Put(key, int64(size))
|
||||
return int64(size), nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@
|
|||
package mongo
|
||||
|
||||
import (
|
||||
"github.com/barakmich/glog"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
"github.com/google/cayley/graph/iterator"
|
||||
)
|
||||
|
|
@ -23,11 +25,74 @@ func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool)
|
|||
switch it.Type() {
|
||||
case graph.LinksTo:
|
||||
return qs.optimizeLinksTo(it.(*iterator.LinksTo))
|
||||
case graph.And:
|
||||
return qs.optimizeAndIterator(it.(*iterator.And))
|
||||
|
||||
}
|
||||
return it, false
|
||||
}
|
||||
|
||||
func (qs *QuadStore) optimizeAndIterator(it *iterator.And) (graph.Iterator, bool) {
|
||||
// Fail fast if nothing can happen
|
||||
glog.V(4).Infoln("Entering optimizeAndIterator", it.UID())
|
||||
found := false
|
||||
for _, it := range it.SubIterators() {
|
||||
glog.V(4).Infoln(it.Type())
|
||||
if it.Type() == mongoType {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
glog.V(4).Infoln("Aborting optimizeAndIterator")
|
||||
return it, false
|
||||
}
|
||||
|
||||
newAnd := iterator.NewAnd(qs)
|
||||
var mongoIt *Iterator
|
||||
for _, it := range it.SubIterators() {
|
||||
switch it.Type() {
|
||||
case mongoType:
|
||||
if mongoIt == nil {
|
||||
mongoIt = it.(*Iterator)
|
||||
} else {
|
||||
newAnd.AddSubIterator(it)
|
||||
}
|
||||
case graph.LinksTo:
|
||||
continue
|
||||
default:
|
||||
newAnd.AddSubIterator(it)
|
||||
}
|
||||
}
|
||||
stats := mongoIt.Stats()
|
||||
|
||||
lset := []graph.Linkage{
|
||||
{
|
||||
Dir: mongoIt.dir,
|
||||
Values: []graph.Value{qs.ValueOf(mongoIt.name)},
|
||||
},
|
||||
}
|
||||
|
||||
n := 0
|
||||
for _, it := range it.SubIterators() {
|
||||
if it.Type() == graph.LinksTo {
|
||||
lto := it.(*iterator.LinksTo)
|
||||
// Is it more effective to do the replacement, or let the mongo check the linksto?
|
||||
ltostats := lto.Stats()
|
||||
if (ltostats.ContainsCost+stats.NextCost)*stats.Size > (ltostats.NextCost+stats.ContainsCost)*ltostats.Size {
|
||||
continue
|
||||
}
|
||||
newLto := NewLinksTo(qs, lto.SubIterators()[0], "quads", lto.Direction(), lset)
|
||||
newAnd.AddSubIterator(newLto)
|
||||
n++
|
||||
}
|
||||
}
|
||||
if n == 0 {
|
||||
return it, false
|
||||
}
|
||||
|
||||
return newAnd.Optimize()
|
||||
}
|
||||
|
||||
func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) {
|
||||
subs := it.SubIterators()
|
||||
if len(subs) != 1 {
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ func TestSingleIterator(t *testing.T) {
|
|||
func TestAndIterator(t *testing.T) {
|
||||
all1 := iterator.NewInt64(1, 3)
|
||||
all2 := iterator.NewInt64(3, 5)
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(nil)
|
||||
and.AddSubIterator(all1)
|
||||
and.AddSubIterator(all2)
|
||||
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ func buildInOutIterator(obj *otto.Object, qs graph.QuadStore, base graph.Iterato
|
|||
in, out = out, in
|
||||
}
|
||||
lto := iterator.NewLinksTo(qs, base, in)
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(iterator.NewLinksTo(qs, predicateNodeIterator, quad.Predicate))
|
||||
and.AddSubIterator(lto)
|
||||
return iterator.NewHasA(qs, and, out)
|
||||
|
|
@ -182,11 +182,11 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It
|
|||
}
|
||||
predFixed := qs.FixedIterator()
|
||||
predFixed.Add(qs.ValueOf(stringArgs[0]))
|
||||
subAnd := iterator.NewAnd()
|
||||
subAnd := iterator.NewAnd(qs)
|
||||
subAnd.AddSubIterator(iterator.NewLinksTo(qs, predFixed, quad.Predicate))
|
||||
subAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Object))
|
||||
hasa := iterator.NewHasA(qs, subAnd, quad.Subject)
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(hasa)
|
||||
and.AddSubIterator(subIt)
|
||||
it = and
|
||||
|
|
@ -202,11 +202,11 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It
|
|||
}
|
||||
predFixed := qs.FixedIterator()
|
||||
predFixed.Add(qs.ValueOf(stringArgs[0]))
|
||||
subAnd := iterator.NewAnd()
|
||||
subAnd := iterator.NewAnd(qs)
|
||||
subAnd.AddSubIterator(iterator.NewLinksTo(qs, predFixed, quad.Predicate))
|
||||
subAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Subject))
|
||||
hasa := iterator.NewHasA(qs, subAnd, quad.Object)
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(hasa)
|
||||
and.AddSubIterator(subIt)
|
||||
it = and
|
||||
|
|
@ -220,11 +220,11 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It
|
|||
}
|
||||
predFixed := qs.FixedIterator()
|
||||
predFixed.Add(qs.ValueOf(stringArgs[0]))
|
||||
subAnd := iterator.NewAnd()
|
||||
subAnd := iterator.NewAnd(qs)
|
||||
subAnd.AddSubIterator(iterator.NewLinksTo(qs, predFixed, quad.Predicate))
|
||||
subAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed, quad.Object))
|
||||
hasa := iterator.NewHasA(qs, subAnd, quad.Subject)
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(hasa)
|
||||
and.AddSubIterator(subIt)
|
||||
it = and
|
||||
|
|
@ -238,14 +238,14 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It
|
|||
}
|
||||
argIt := buildIteratorTree(firstArg.Object(), qs)
|
||||
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(subIt)
|
||||
and.AddSubIterator(argIt)
|
||||
it = and
|
||||
case "back":
|
||||
arg, _ := obj.Get("_gremlin_back_chain")
|
||||
argIt := buildIteratorTree(arg.Object(), qs)
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(subIt)
|
||||
and.AddSubIterator(argIt)
|
||||
it = and
|
||||
|
|
@ -254,7 +254,7 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It
|
|||
for _, name := range stringArgs {
|
||||
fixed.Add(qs.ValueOf(name))
|
||||
}
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(fixed)
|
||||
and.AddSubIterator(subIt)
|
||||
it = and
|
||||
|
|
@ -311,7 +311,7 @@ func buildIteratorTreeHelper(obj *otto.Object, qs graph.QuadStore, base graph.It
|
|||
toComplementIt := buildIteratorTree(firstArg.Object(), qs)
|
||||
notIt := iterator.NewNot(toComplementIt, allIt)
|
||||
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
and.AddSubIterator(subIt)
|
||||
and.AddSubIterator(notIt)
|
||||
it = and
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ func (q *Query) buildIteratorTreeInternal(query interface{}, path Path) (it grap
|
|||
}
|
||||
|
||||
func (q *Query) buildIteratorTreeMapInternal(query map[string]interface{}, path Path) (graph.Iterator, error) {
|
||||
it := iterator.NewAnd()
|
||||
it := iterator.NewAnd(q.ses.qs)
|
||||
it.AddSubIterator(q.ses.qs.NodesAllIterator())
|
||||
var err error
|
||||
err = nil
|
||||
|
|
@ -136,7 +136,7 @@ func (q *Query) buildIteratorTreeMapInternal(query map[string]interface{}, path
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
subAnd := iterator.NewAnd()
|
||||
subAnd := iterator.NewAnd(q.ses.qs)
|
||||
predFixed := q.ses.qs.FixedIterator()
|
||||
predFixed.Add(q.ses.qs.ValueOf(pred))
|
||||
subAnd.AddSubIterator(iterator.NewLinksTo(q.ses.qs, predFixed, quad.Predicate))
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ func buildIteratorTree(tree *peg.ExpressionTree, qs graph.QuadStore) graph.Itera
|
|||
return lto
|
||||
case "RootConstraint":
|
||||
constraintCount := 0
|
||||
and := iterator.NewAnd()
|
||||
and := iterator.NewAnd(qs)
|
||||
for _, c := range tree.Children {
|
||||
switch c.Name {
|
||||
case "NodeIdentifier":
|
||||
|
|
@ -232,7 +232,7 @@ func buildIteratorTree(tree *peg.ExpressionTree, qs graph.QuadStore) graph.Itera
|
|||
var hasa *iterator.HasA
|
||||
topLevelDir := quad.Subject
|
||||
subItDir := quad.Object
|
||||
subAnd := iterator.NewAnd()
|
||||
subAnd := iterator.NewAnd(qs)
|
||||
isOptional := false
|
||||
for _, c := range tree.Children {
|
||||
switch c.Name {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue