Move iterators into separate package

Also reduce API exposure and use standard library more - and fix bugs I
previously introduces in mongo.
This commit is contained in:
kortschak 2014-06-30 22:22:50 +09:30
parent 88be6bee37
commit 1768e593a8
62 changed files with 3240 additions and 3130 deletions

View file

@ -0,0 +1,118 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Defines one of the base iterators, the All iterator. Which, logically
// enough, represents all nodes or all links in the graph.
//
// This particular file is actually vestigal. It's up to the TripleStore to give
// us an All iterator that represents all things in the graph. So this is
// really the All iterator for the MemTripleStore. That said, it *is* one of
// the base iterators, and it helps just to see it here.
import (
"fmt"
"strings"
"github.com/google/cayley/graph"
)
// An All iterator across a range of int64 values, from `max` to `min`.
type Int64 struct {
Base
max, min int64
at int64
}
// Creates a new Int64 with the given range.
func NewInt64(min, max int64) *Int64 {
var all Int64
BaseInit(&all.Base)
all.max = max
all.min = min
all.at = min
return &all
}
// Start back at the beginning
func (it *Int64) Reset() {
it.at = it.min
}
func (it *Int64) Close() {}
func (it *Int64) Clone() graph.Iterator {
out := NewInt64(it.min, it.max)
out.CopyTagsFrom(it)
return out
}
// Prints the All iterator as just an "all".
func (it *Int64) DebugString(indent int) string {
return fmt.Sprintf("%s(%s tags: %v)", strings.Repeat(" ", indent), it.Type(), it.Tags())
}
// Next() on an Int64 all iterator is a simple incrementing counter.
// Return the next integer, and mark it as the result.
func (it *Int64) Next() (graph.TSVal, bool) {
NextLogIn(it)
if it.at == -1 {
return NextLogOut(it, nil, false)
}
val := it.at
it.at = it.at + 1
if it.at > it.max {
it.at = -1
}
it.Last = val
return NextLogOut(it, val, true)
}
// The number of elements in an Int64 is the size of the range.
// The size is exact.
func (it *Int64) Size() (int64, bool) {
Size := ((it.max - it.min) + 1)
return Size, true
}
// Check() for an Int64 is merely seeing if the passed value is
// withing the range, assuming the value is an int64.
func (it *Int64) Check(tsv graph.TSVal) bool {
CheckLogIn(it, tsv)
v := tsv.(int64)
if it.min <= v && v <= it.max {
it.Last = v
return CheckLogOut(it, v, true)
}
return CheckLogOut(it, v, false)
}
// The type of this iterator is an "all". This is important, as it puts it in
// the class of "all iterators.
func (it *Int64) Type() string { return "all" }
// There's nothing to optimize about this little iterator.
func (it *Int64) Optimize() (graph.Iterator, bool) { return it, false }
// Stats for an Int64 are simple. Super cheap to do any operation,
// and as big as the range.
func (it *Int64) GetStats() *graph.IteratorStats {
s, _ := it.Size()
return &graph.IteratorStats{
CheckCost: 1,
NextCost: 1,
Size: s,
}
}

View file

@ -0,0 +1,248 @@
// Defines the And iterator, one of the base iterators. And requires no
// knowledge of the constituent TripleStore; its sole purpose is to act as an
// intersection operator across the subiterators it is given. If one iterator
// contains [1,3,5] and another [2,3,4] -- then And is an iterator that
// 'contains' [3]
//
// It accomplishes this in one of two ways. If it is a Next()ed iterator (that
// is, it is a top level iterator, or on the "Next() path", then it will Next()
// it's primary iterator (helpfully, and.primary_it) and Check() the resultant
// value against it's other iterators. If it matches all of them, then it
// returns that value. Otherwise, it repeats the process.
//
// If it's on a Check() path, it merely Check()s every iterator, and returns the
// logical AND of each result.
package iterator
import (
"fmt"
"strings"
"github.com/google/cayley/graph"
)
// The And iterator. Consists of a Base and a number of subiterators, the primary of which will
// be Next()ed if next is called.
type And struct {
Base
internalIterators []graph.Iterator
itCount int
primaryIt graph.Iterator
checkList []graph.Iterator
}
// Creates a new And iterator.
func NewAnd() *And {
var and And
BaseInit(&and.Base)
and.internalIterators = make([]graph.Iterator, 0, 20)
and.checkList = nil
return &and
}
// Reset all internal iterators
func (it *And) Reset() {
it.primaryIt.Reset()
for _, sub := range it.internalIterators {
sub.Reset()
}
it.checkList = nil
}
func (it *And) Clone() graph.Iterator {
and := NewAnd()
and.AddSubIterator(it.primaryIt.Clone())
and.CopyTagsFrom(it)
for _, sub := range it.internalIterators {
and.AddSubIterator(sub.Clone())
}
if it.checkList != nil {
and.optimizeCheck()
}
return and
}
// Returns a slice of the subiterators, in order (primary iterator first).
func (it *And) GetSubIterators() []graph.Iterator {
iters := make([]graph.Iterator, len(it.internalIterators)+1)
iters[0] = it.primaryIt
copy(iters[1:], it.internalIterators)
return iters
}
// Overrides Base TagResults, as it needs to add it's own results and
// recurse down it's subiterators.
func (it *And) TagResults(out *map[string]graph.TSVal) {
it.Base.TagResults(out)
if it.primaryIt != nil {
it.primaryIt.TagResults(out)
}
for _, sub := range it.internalIterators {
sub.TagResults(out)
}
}
// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators.
func (it *And) GetResultTree() *graph.ResultTree {
tree := graph.NewResultTree(it.LastResult())
tree.AddSubtree(it.primaryIt.GetResultTree())
for _, sub := range it.internalIterators {
tree.AddSubtree(sub.GetResultTree())
}
return tree
}
// Prints information about this iterator.
func (it *And) DebugString(indent int) string {
var total string
for i, sub := range it.internalIterators {
total += strings.Repeat(" ", indent+2)
total += fmt.Sprintf("%d:\n%s\n", i, sub.DebugString(indent+4))
}
var tags string
for _, k := range it.Tags() {
tags += fmt.Sprintf("%s;", k)
}
spaces := strings.Repeat(" ", indent+2)
return fmt.Sprintf("%s(%s %d\n%stags:%s\n%sprimary_it:\n%s\n%sother_its:\n%s)",
strings.Repeat(" ", indent),
it.Type(),
it.GetUid(),
spaces,
tags,
spaces,
it.primaryIt.DebugString(indent+4),
spaces,
total)
}
// Add a subiterator to this And iterator.
//
// The first iterator that is added becomes the primary iterator. This is
// important. Calling Optimize() is the way to change the order based on
// subiterator statistics. Without Optimize(), the order added is the order
// used.
func (it *And) AddSubIterator(sub graph.Iterator) {
if it.itCount > 0 {
it.internalIterators = append(it.internalIterators, sub)
it.itCount++
return
}
it.primaryIt = sub
it.itCount++
}
// Returns the Next value from the And iterator. Because the And is the
// intersection of its subiterators, it must choose one subiterator to produce a
// candidate, and check this value against the subiterators. A productive choice
// of primary iterator is therefore very important.
func (it *And) Next() (graph.TSVal, bool) {
NextLogIn(it)
var curr graph.TSVal
var exists bool
for {
curr, exists = it.primaryIt.Next()
if !exists {
return NextLogOut(it, nil, false)
}
if it.checkSubIts(curr) {
it.Last = curr
return NextLogOut(it, curr, true)
}
}
panic("Somehow broke out of Next() loop in And")
}
// Checks a value against the non-primary iterators, in order.
func (it *And) checkSubIts(val graph.TSVal) bool {
var subIsGood = true
for _, sub := range it.internalIterators {
subIsGood = sub.Check(val)
if !subIsGood {
break
}
}
return subIsGood
}
func (it *And) checkCheckList(val graph.TSVal) bool {
ok := true
for _, c := range it.checkList {
ok = c.Check(val)
if !ok {
break
}
}
if ok {
it.Last = val
}
return CheckLogOut(it, val, ok)
}
// Check a value against the entire iterator, in order.
func (it *And) Check(val graph.TSVal) bool {
CheckLogIn(it, val)
if it.checkList != nil {
return it.checkCheckList(val)
}
mainGood := it.primaryIt.Check(val)
if !mainGood {
return CheckLogOut(it, val, false)
}
othersGood := it.checkSubIts(val)
if !othersGood {
return CheckLogOut(it, val, false)
}
it.Last = val
return CheckLogOut(it, val, true)
}
// Returns the approximate size of the And iterator. Because we're dealing
// with an intersection, we know that the largest we can be is the size of the
// smallest iterator. This is the heuristic we shall follow. Better heuristics
// welcome.
func (it *And) Size() (int64, bool) {
val, b := it.primaryIt.Size()
for _, sub := range it.internalIterators {
newval, newb := sub.Size()
if val > newval {
val = newval
}
b = newb && b
}
return val, b
}
// An And has no NextResult of its own -- that is, there are no other values
// which satisfy our previous result that are not the result itself. Our
// subiterators might, however, so just pass the call recursively.
func (it *And) NextResult() bool {
if it.primaryIt.NextResult() {
return true
}
for _, sub := range it.internalIterators {
if sub.NextResult() {
return true
}
}
return false
}
// Perform and-specific cleanup, of which there currently is none.
func (it *And) cleanUp() {}
// Close this iterator, and, by extension, close the subiterators.
// Close should be idempotent, and it follows that if it's subiterators
// follow this contract, the And follows the contract.
func (it *And) Close() {
it.cleanUp()
it.primaryIt.Close()
for _, sub := range it.internalIterators {
sub.Close()
}
}
// Register this as an "and" iterator.
func (it *And) Type() string { return "and" }

View file

@ -0,0 +1,317 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"sort"
"github.com/google/cayley/graph"
)
// Perhaps the most tricky file in this entire module. Really a method on the
// And, but important enough to deserve its own file.
//
// Calling Optimize() on an And iterator, like any iterator, requires that we
// preserve the underlying meaning. However, the And has many choices, namely,
// which one of it's subiterators will be the branch that does the Next()ing,
// and which ordering of the remaining iterators is the most efficient. In
// short, this is where a lot of the query optimization happens, and there are
// many wins to be had here, as well as many bad bugs. The worst class of bug
// changes the meaning of the query. The second worst class makes things really
// slow.
//
// The good news is this: If Optimize() is never called (turned off, perhaps) we can
// be sure the results are as good as the query language called for.
//
// In short, tread lightly.
// Optimizes the And, by picking the most efficient way to Next() and
// Check() its subiterators. For SQL fans, this is equivalent to JOIN.
func (it *And) Optimize() (graph.Iterator, bool) {
// First, let's get the slice of iterators, in order (first one is Next()ed,
// the rest are Check()ed)
old := it.GetSubIterators()
// And call Optimize() on our subtree, replacing each one in the order we
// found them. it_list is the newly optimized versions of these, and changed
// is another list, of only the ones that have returned replacements and
// changed.
its := optimizeSubIterators(old)
// Close the replaced iterators (they ought to close themselves, but Close()
// is idempotent, so this just protects against any machinations).
closeIteratorList(old, nil)
// If we can find only one subiterator which is equivalent to this whole and,
// we can replace the And...
out := it.optimizeReplacement(its)
if out != nil {
// ...Move the tags to the replacement...
moveTagsTo(out, it)
// ...Close everyone except `out`, our replacement...
closeIteratorList(its, out)
// ...And return it.
return out, true
}
// And now, without changing any of the iterators, we reorder them. it_list is
// now a permutation of itself, but the contents are unchanged.
its = optimizeOrder(its)
// Okay! At this point we have an optimized order.
// The easiest thing to do at this point is merely to create a new And iterator
// and replace ourselves with our (reordered, optimized) clone.
newAnd := NewAnd()
// Add the subiterators in order.
for _, sub := range its {
newAnd.AddSubIterator(sub)
}
// Move the tags hanging on us (like any good replacement).
newAnd.CopyTagsFrom(it)
newAnd.optimizeCheck()
// And close ourselves but not our subiterators -- some may still be alive in
// the new And (they were unchanged upon calling Optimize() on them, at the
// start).
it.cleanUp()
return newAnd, true
}
// Closes a list of iterators, except the one passed in `except`. Closes all
// of the iterators in the list if `except` is nil.
func closeIteratorList(its []graph.Iterator, except graph.Iterator) {
for _, it := range its {
if it != except {
it.Close()
}
}
}
// Find if there is a single subiterator which is a valid replacement for this
// And.
func (_ *And) optimizeReplacement(its []graph.Iterator) graph.Iterator {
// If we were created with no SubIterators, we're as good as Null.
if len(its) == 0 {
return &Null{}
}
if len(its) == 1 {
// When there's only one iterator, there's only one choice.
return its[0]
}
// If any of our subiterators, post-optimization, are also Null, then
// there's no point in continuing the branch, we will have no results
// and we are null as well.
if hasAnyNullIterators(its) {
return &Null{}
}
// If we have one useful iterator, use that.
it := hasOneUsefulIterator(its)
if it != nil {
return it
}
return nil
}
// optimizeOrder(l) takes a list and returns a list, containing the same contents
// but with a new ordering, however it wishes.
func optimizeOrder(its []graph.Iterator) []graph.Iterator {
var (
// bad contains iterators that can't be (efficiently) nexted, such as
// "optional" or "not". Separate them out and tack them on at the end.
out, bad []graph.Iterator
best graph.Iterator
bestCost = int64(1 << 62)
)
// Find the iterator with the projected "best" total cost.
// Total cost is defined as The Next()ed iterator's cost to Next() out
// all of it's contents, and to Check() each of those against everyone
// else.
for _, it := range its {
if !it.Nextable() {
bad = append(bad, it)
continue
}
rootStats := it.GetStats()
cost := rootStats.NextCost
for _, f := range its {
if !f.Nextable() {
continue
}
if f == it {
continue
}
stats := f.GetStats()
cost += stats.CheckCost
}
cost *= rootStats.Size
if cost < bestCost {
best = it
bestCost = cost
}
}
// TODO(barakmich): Optimization of order need not stop here. Picking a smart
// Check() order based on probability of getting a false Check() first is
// useful (fail faster).
// Put the best iterator (the one we wish to Next()) at the front...
out = append(out, best)
// ... push everyone else after...
for _, it := range its {
if !it.Nextable() {
continue
}
if it != best {
out = append(out, it)
}
}
// ...and finally, the difficult children on the end.
return append(out, bad...)
}
type byCost []graph.Iterator
func (c byCost) Len() int { return len(c) }
func (c byCost) Less(i, j int) bool { return c[i].GetStats().CheckCost < c[j].GetStats().CheckCost }
func (c byCost) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
// optimizeCheck(l) creates an alternate check list, containing the same contents
// but with a new ordering, however it wishes.
func (it *And) optimizeCheck() {
// GetSubIterators allocates, so this is currently safe.
// TODO(kortschak) Reuse it.checkList if possible.
// This involves providing GetSubIterators with a slice to fill.
// Generally this is a worthwhile thing to do in other places as well.
it.checkList = it.GetSubIterators()
sort.Sort(byCost(it.checkList))
}
// If we're replacing ourselves by a single iterator, we need to grab the
// result tags from the iterators that, while still valid and would hold
// the same values as this and, are not going to stay.
// getSubTags() returns a map of the tags for all the subiterators.
func (it *And) getSubTags() map[string]struct{} {
tags := make(map[string]struct{})
for _, sub := range it.GetSubIterators() {
for _, tag := range sub.Tags() {
tags[tag] = struct{}{}
}
}
for _, tag := range it.Tags() {
tags[tag] = struct{}{}
}
return tags
}
// moveTagsTo() gets the tags for all of the src's subiterators and the
// src itself, and moves them to dst.
func moveTagsTo(dst graph.Iterator, src *And) {
tags := src.getSubTags()
for _, tag := range dst.Tags() {
if _, ok := tags[tag]; ok {
delete(tags, tag)
}
}
for k := range tags {
dst.AddTag(k)
}
}
// optimizeSubIterators(l) takes a list of iterators and calls Optimize() on all
// of them. It returns two lists -- the first contains the same list as l, where
// any replacements are made by Optimize() and the second contains the originals
// which were replaced.
func optimizeSubIterators(its []graph.Iterator) []graph.Iterator {
var optIts []graph.Iterator
for _, it := range its {
o, changed := it.Optimize()
if changed {
optIts = append(optIts, o)
} else {
optIts = append(optIts, it.Clone())
}
}
return optIts
}
// Check a list of iterators for any Null iterators.
func hasAnyNullIterators(its []graph.Iterator) bool {
for _, it := range its {
if it.Type() == "null" {
return true
}
}
return false
}
// There are two "not-useful" iterators -- namely "null" which returns
// nothing, and "all" which returns everything. Particularly, we want
// to see if we're intersecting with a bunch of "all" iterators, and,
// if we are, then we have only one useful iterator.
func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator {
usefulCount := 0
var usefulIt graph.Iterator
for _, it := range its {
switch it.Type() {
case "null", "all":
continue
case "optional":
// Optional is weird -- it's not useful, but we can't optimize
// away from it. Therefore, we skip this optimization
// if we see one.
return nil
default:
usefulCount++
usefulIt = it
}
}
if usefulCount == 1 {
return usefulIt
}
return nil
}
// and.GetStats() lives here in and-iterator-optimize.go because it may
// in the future return different statistics based on how it is optimized.
// For now, however, it's pretty static.
func (it *And) GetStats() *graph.IteratorStats {
primaryStats := it.primaryIt.GetStats()
CheckCost := primaryStats.CheckCost
NextCost := primaryStats.NextCost
Size := primaryStats.Size
for _, sub := range it.internalIterators {
stats := sub.GetStats()
NextCost += stats.CheckCost
CheckCost += stats.CheckCost
if Size > stats.Size {
Size = stats.Size
}
}
return &graph.IteratorStats{
CheckCost: CheckCost,
NextCost: NextCost,
Size: Size,
}
}

View file

@ -0,0 +1,110 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Tests relating to methods in and-iterator-optimize. Many are pretty simplistic, but
// nonetheless cover a lot of basic cases.
import (
"reflect"
"sort"
"testing"
)
func TestIteratorPromotion(t *testing.T) {
all := NewInt64(1, 3)
fixed := newFixed()
fixed.AddValue(3)
a := NewAnd()
a.AddSubIterator(all)
a.AddSubIterator(fixed)
all.AddTag("a")
fixed.AddTag("b")
a.AddTag("c")
newIt, changed := a.Optimize()
if !changed {
t.Error("Iterator didn't optimize")
}
if newIt.Type() != "fixed" {
t.Error("Expected fixed iterator")
}
tagsExpected := []string{"a", "b", "c"}
tags := newIt.Tags()
sort.Strings(tags)
if !reflect.DeepEqual(tags, tagsExpected) {
t.Fatal("Tags don't match")
}
}
func TestNullIteratorAnd(t *testing.T) {
all := NewInt64(1, 3)
null := NewNull()
a := NewAnd()
a.AddSubIterator(all)
a.AddSubIterator(null)
newIt, changed := a.Optimize()
if !changed {
t.Error("Didn't change")
}
if newIt.Type() != "null" {
t.Error("Expected null iterator, got ", newIt.Type())
}
}
func TestReorderWithTag(t *testing.T) {
all := NewInt64(100, 300)
all.AddTag("good")
all2 := NewInt64(1, 30000)
all2.AddTag("slow")
a := NewAnd()
// Make all2 the default iterator
a.AddSubIterator(all2)
a.AddSubIterator(all)
newIt, changed := a.Optimize()
if !changed {
t.Error("Expected new iterator")
}
expectedTags := []string{"good", "slow"}
tagsOut := make([]string, 0)
for _, sub := range newIt.GetSubIterators() {
for _, x := range sub.Tags() {
tagsOut = append(tagsOut, x)
}
}
if !reflect.DeepEqual(expectedTags, tagsOut) {
t.Fatal("Tags don't match")
}
}
func TestAndStatistics(t *testing.T) {
all := NewInt64(100, 300)
all.AddTag("good")
all2 := NewInt64(1, 30000)
all2.AddTag("slow")
a := NewAnd()
// Make all2 the default iterator
a.AddSubIterator(all2)
a.AddSubIterator(all)
stats1 := a.GetStats()
newIt, changed := a.Optimize()
if !changed {
t.Error("Didn't optimize")
}
stats2 := newIt.GetStats()
if stats2.NextCost > stats1.NextCost {
t.Error("And didn't optimize. Next cost old ", stats1.NextCost, "and new ", stats2.NextCost)
}
}

View file

@ -0,0 +1,149 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"testing"
"github.com/google/cayley/graph"
)
// Make sure that tags work on the And.
func TestTag(t *testing.T) {
fix1 := newFixed()
fix1.AddValue(234)
fix1.AddTag("foo")
and := NewAnd()
and.AddSubIterator(fix1)
and.AddTag("bar")
out := fix1.Tags()
if len(out) != 1 {
t.Errorf("Expected length 1, got %d", len(out))
}
if out[0] != "foo" {
t.Errorf("Cannot get tag back, got %s", out[0])
}
val, ok := and.Next()
if !ok {
t.Errorf("And did not next")
}
if val != 234 {
t.Errorf("Unexpected value")
}
tags := make(map[string]graph.TSVal)
and.TagResults(&tags)
if tags["bar"] != 234 {
t.Errorf("no bar tag")
}
if tags["foo"] != 234 {
t.Errorf("no foo tag")
}
}
// Do a simple itersection of fixed values.
func TestAndAndFixedIterators(t *testing.T) {
fix1 := newFixed()
fix1.AddValue(1)
fix1.AddValue(2)
fix1.AddValue(3)
fix1.AddValue(4)
fix2 := newFixed()
fix2.AddValue(3)
fix2.AddValue(4)
fix2.AddValue(5)
and := NewAnd()
and.AddSubIterator(fix1)
and.AddSubIterator(fix2)
// Should be as big as smallest subiterator
size, accurate := and.Size()
if size != 3 {
t.Error("Incorrect size")
}
if !accurate {
t.Error("not accurate")
}
val, ok := and.Next()
if val != 3 || ok == false {
t.Error("Incorrect first value")
}
val, ok = and.Next()
if val != 4 || ok == false {
t.Error("Incorrect second value")
}
val, ok = and.Next()
if ok {
t.Error("Too many values")
}
}
// If there's no intersection, the size should still report the same,
// but there should be nothing to Next()
func TestNonOverlappingFixedIterators(t *testing.T) {
fix1 := newFixed()
fix1.AddValue(1)
fix1.AddValue(2)
fix1.AddValue(3)
fix1.AddValue(4)
fix2 := newFixed()
fix2.AddValue(5)
fix2.AddValue(6)
fix2.AddValue(7)
and := NewAnd()
and.AddSubIterator(fix1)
and.AddSubIterator(fix2)
// Should be as big as smallest subiterator
size, accurate := and.Size()
if size != 3 {
t.Error("Incorrect size")
}
if !accurate {
t.Error("not accurate")
}
_, ok := and.Next()
if ok {
t.Error("Too many values")
}
}
func TestAllIterators(t *testing.T) {
all1 := NewInt64(1, 5)
all2 := NewInt64(4, 10)
and := NewAnd()
and.AddSubIterator(all2)
and.AddSubIterator(all1)
val, ok := and.Next()
if val.(int64) != 4 || ok == false {
t.Error("Incorrect first value")
}
val, ok = and.Next()
if val.(int64) != 5 || ok == false {
t.Error("Incorrect second value")
}
val, ok = and.Next()
if ok {
t.Error("Too many values")
}
}

View file

@ -0,0 +1,157 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Defines one of the base iterators, the Fixed iterator. A fixed iterator is quite simple; it
// contains an explicit fixed array of values.
//
// A fixed iterator requires an Equality function to be passed to it, by reason that graph.TSVal, the
// opaque Triple store value, may not answer to ==.
import (
"fmt"
"strings"
"github.com/google/cayley/graph"
)
// A Fixed iterator consists of it's values, an index (where it is in the process of Next()ing) and
// an equality function.
type Fixed struct {
Base
values []graph.TSVal
lastIndex int
cmp Equality
}
// Define the signature of an equality function.
type Equality func(a, b graph.TSVal) bool
// Define an equality function of purely ==, which works for native types.
func BasicEquality(a, b graph.TSVal) bool {
if a == b {
return true
}
return false
}
// Creates a new Fixed iterator based around == equality.
func newFixed() *Fixed {
return NewFixedIteratorWithCompare(BasicEquality)
}
// Creates a new Fixed iterator with a custom comparitor.
func NewFixedIteratorWithCompare(compareFn Equality) *Fixed {
var it Fixed
BaseInit(&it.Base)
it.values = make([]graph.TSVal, 0, 20)
it.lastIndex = 0
it.cmp = compareFn
return &it
}
func (it *Fixed) Reset() {
it.lastIndex = 0
}
func (it *Fixed) Close() {}
func (it *Fixed) Clone() graph.Iterator {
out := NewFixedIteratorWithCompare(it.cmp)
for _, val := range it.values {
out.AddValue(val)
}
out.CopyTagsFrom(it)
return out
}
// Add a value to the iterator. The array now contains this value.
// TODO(barakmich): This ought to be a set someday, disallowing repeated values.
func (it *Fixed) AddValue(v graph.TSVal) {
it.values = append(it.values, v)
}
// Print some information about the iterator.
func (it *Fixed) DebugString(indent int) string {
value := ""
if len(it.values) > 0 {
value = fmt.Sprint(it.values[0])
}
return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)",
strings.Repeat(" ", indent),
it.Type(),
it.FixedTags(),
len(it.values),
value,
)
}
// Register this iterator as a Fixed iterator.
func (it *Fixed) Type() string {
return "fixed"
}
// Check if the passed value is equal to one of the values stored in the iterator.
func (it *Fixed) Check(v graph.TSVal) bool {
// Could be optimized by keeping it sorted or using a better datastructure.
// However, for fixed iterators, which are by definition kind of tiny, this
// isn't a big issue.
CheckLogIn(it, v)
for _, x := range it.values {
if it.cmp(x, v) {
it.Last = x
return CheckLogOut(it, v, true)
}
}
return CheckLogOut(it, v, false)
}
// Return the next stored value from the iterator.
func (it *Fixed) Next() (graph.TSVal, bool) {
NextLogIn(it)
if it.lastIndex == len(it.values) {
return NextLogOut(it, nil, false)
}
out := it.values[it.lastIndex]
it.Last = out
it.lastIndex++
return NextLogOut(it, out, true)
}
// Optimize() for a Fixed iterator is simple. Returns a Null iterator if it's empty
// (so that other iterators upstream can treat this as null) or there is no
// optimization.
func (it *Fixed) Optimize() (graph.Iterator, bool) {
if len(it.values) == 1 && it.values[0] == nil {
return &Null{}, true
}
return it, false
}
// Size is the number of values stored.
func (it *Fixed) Size() (int64, bool) {
return int64(len(it.values)), true
}
// As we right now have to scan the entire list, Next and Check are linear with the
// size. However, a better data structure could remove these limits.
func (it *Fixed) GetStats() *graph.IteratorStats {
return &graph.IteratorStats{
CheckCost: int64(len(it.values)),
NextCost: int64(len(it.values)),
Size: int64(len(it.values)),
}
}

View file

@ -0,0 +1,223 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Defines one of the base iterators, the HasA iterator. The HasA takes a
// subiterator of links, and acts as an iterator of nodes in the given
// direction. The name comes from the idea that a "link HasA subject" or a "link
// HasA predicate".
//
// HasA is weird in that it may return the same value twice if on the Next()
// path. That's okay -- in reality, it can be viewed as returning the value for
// a new triple, but to make logic much simpler, here we have the HasA.
//
// Likewise, it's important to think about Check()ing a HasA. When given a
// value to check, it means "Check all predicates that have this value for your
// direction against the subiterator." This would imply that there's more than
// one possibility for the same Check()ed value. While we could return the
// number of options, it's simpler to return one, and then call NextResult()
// enough times to enumerate the options. (In fact, one could argue that the
// raison d'etre for NextResult() is this iterator).
//
// Alternatively, can be seen as the dual of the LinksTo iterator.
import (
"fmt"
"strings"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
)
// A HasA consists of a reference back to the graph.TripleStore that it references,
// a primary subiterator, a direction in which the triples for that subiterator point,
// and a temporary holder for the iterator generated on Check().
type HasA struct {
Base
ts graph.TripleStore
primaryIt graph.Iterator
dir graph.Direction
resultIt graph.Iterator
}
// Construct a new HasA iterator, given the triple subiterator, and the triple
// direction for which it stands.
func NewHasA(ts graph.TripleStore, subIt graph.Iterator, d graph.Direction) *HasA {
var hasa HasA
BaseInit(&hasa.Base)
hasa.ts = ts
hasa.primaryIt = subIt
hasa.dir = d
return &hasa
}
// Return our sole subiterator.
func (it *HasA) GetSubIterators() []graph.Iterator {
return []graph.Iterator{it.primaryIt}
}
func (it *HasA) Reset() {
it.primaryIt.Reset()
if it.resultIt != nil {
it.resultIt.Close()
}
}
func (it *HasA) Clone() graph.Iterator {
out := NewHasA(it.ts, it.primaryIt.Clone(), it.dir)
out.CopyTagsFrom(it)
return out
}
// Direction accessor.
func (it *HasA) Direction() graph.Direction { return it.dir }
// Pass the Optimize() call along to the subiterator. If it becomes Null,
// then the HasA becomes Null (there are no triples that have any directions).
func (it *HasA) Optimize() (graph.Iterator, bool) {
newPrimary, changed := it.primaryIt.Optimize()
if changed {
it.primaryIt = newPrimary
if it.primaryIt.Type() == "null" {
return it.primaryIt, true
}
}
return it, false
}
// Pass the TagResults down the chain.
func (it *HasA) TagResults(out *map[string]graph.TSVal) {
it.Base.TagResults(out)
it.primaryIt.TagResults(out)
}
// DEPRECATED Return results in a ResultTree.
func (it *HasA) GetResultTree() *graph.ResultTree {
tree := graph.NewResultTree(it.LastResult())
tree.AddSubtree(it.primaryIt.GetResultTree())
return tree
}
// Print some information about this iterator.
func (it *HasA) DebugString(indent int) string {
var tags string
for _, k := range it.Tags() {
tags += fmt.Sprintf("%s;", k)
}
return fmt.Sprintf("%s(%s %d tags:%s direction:%s\n%s)", strings.Repeat(" ", indent), it.Type(), it.GetUid(), tags, it.dir, it.primaryIt.DebugString(indent+4))
}
// Check a value against our internal iterator. In order to do this, we must first open a new
// iterator of "triples that have `val` in our direction", given to us by the triple store,
// and then Next() values out of that iterator and Check() them against our subiterator.
func (it *HasA) Check(val graph.TSVal) bool {
CheckLogIn(it, val)
if glog.V(4) {
glog.V(4).Infoln("Id is", it.ts.GetNameFor(val))
}
// TODO(barakmich): Optimize this
if it.resultIt != nil {
it.resultIt.Close()
}
it.resultIt = it.ts.GetTripleIterator(it.dir, val)
return CheckLogOut(it, val, it.GetCheckResult())
}
// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the
// result iterator (a triple iterator based on the last checked value) and returns true if
// another match is made.
func (it *HasA) GetCheckResult() bool {
for {
linkVal, ok := it.resultIt.Next()
if !ok {
break
}
if glog.V(4) {
glog.V(4).Infoln("Triple is", it.ts.GetTriple(linkVal))
}
if it.primaryIt.Check(linkVal) {
it.Last = it.ts.GetTripleDirection(linkVal, it.dir)
return true
}
}
return false
}
// Get the next result that matches this branch.
func (it *HasA) NextResult() bool {
// Order here is important. If the subiterator has a NextResult, then we
// need do nothing -- there is a next result, and we shouldn't move forward.
// However, we then need to get the next result from our last Check().
//
// The upshot is, the end of NextResult() bubbles up from the bottom of the
// iterator tree up, and we need to respect that.
if it.primaryIt.NextResult() {
return true
}
return it.GetCheckResult()
}
// Get the next result from this iterator. This is simpler than Check. We have a
// subiterator we can get a value from, and we can take that resultant triple,
// pull our direction out of it, and return that.
func (it *HasA) Next() (graph.TSVal, bool) {
NextLogIn(it)
if it.resultIt != nil {
it.resultIt.Close()
}
it.resultIt = &Null{}
tID, ok := it.primaryIt.Next()
if !ok {
return NextLogOut(it, 0, false)
}
name := it.ts.GetTriple(tID).Get(it.dir)
val := it.ts.GetIdFor(name)
it.Last = val
return NextLogOut(it, val, true)
}
// GetStats() returns the statistics on the HasA iterator. This is curious. Next
// cost is easy, it's an extra call or so on top of the subiterator Next cost.
// CheckCost involves going to the graph.TripleStore, iterating out values, and hoping
// one sticks -- potentially expensive, depending on fanout. Size, however, is
// potentially smaller. we know at worst it's the size of the subiterator, but
// if there are many repeated values, it could be much smaller in totality.
func (it *HasA) GetStats() *graph.IteratorStats {
subitStats := it.primaryIt.GetStats()
// TODO(barakmich): These should really come from the triplestore itself
// and be optimized.
faninFactor := int64(1)
fanoutFactor := int64(30)
nextConstant := int64(2)
tripleConstant := int64(1)
return &graph.IteratorStats{
NextCost: tripleConstant + subitStats.NextCost,
CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost,
Size: faninFactor * subitStats.Size,
}
}
// Close the subiterator, the result iterator (if any) and the HasA.
func (it *HasA) Close() {
if it.resultIt != nil {
it.resultIt.Close()
}
it.primaryIt.Close()
}
// Register this iterator as a HasA.
func (it *HasA) Type() string { return "hasa" }

223
graph/iterator/iterator.go Normal file
View file

@ -0,0 +1,223 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Define the general iterator interface, as well as the Base which all
// iterators can "inherit" from to get default iterator functionality.
import (
"fmt"
"strings"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
)
var iterator_n int = 0
// The Base iterator is the iterator other iterators inherit from to get some
// default functionality.
type Base struct {
Last graph.TSVal
tags []string
fixedTags map[string]graph.TSVal
nextable bool
uid int
}
// Called by subclases.
func BaseInit(it *Base) {
// Your basic iterator is nextable
it.nextable = true
it.uid = iterator_n
if glog.V(2) {
iterator_n++
}
}
func (it *Base) GetUid() int {
return it.uid
}
// Adds a tag to the iterator. Most iterators don't need to override.
func (it *Base) AddTag(tag string) {
if it.tags == nil {
it.tags = make([]string, 0)
}
it.tags = append(it.tags, tag)
}
func (it *Base) AddFixedTag(tag string, value graph.TSVal) {
if it.fixedTags == nil {
it.fixedTags = make(map[string]graph.TSVal)
}
it.fixedTags[tag] = value
}
// Returns the tags.
func (it *Base) Tags() []string {
return it.tags
}
func (it *Base) FixedTags() map[string]graph.TSVal {
return it.fixedTags
}
func (it *Base) CopyTagsFrom(other_it graph.Iterator) {
for _, tag := range other_it.Tags() {
it.AddTag(tag)
}
for k, v := range other_it.FixedTags() {
it.AddFixedTag(k, v)
}
}
// Prints a silly debug string. Most classes override.
func (it *Base) DebugString(indent int) string {
return fmt.Sprintf("%s(base)", strings.Repeat(" ", indent))
}
// Nothing in a base iterator.
func (it *Base) Check(v graph.TSVal) bool {
return false
}
// Base iterators should never appear in a tree if they are, select against
// them.
func (it *Base) GetStats() *graph.IteratorStats {
return &graph.IteratorStats{100000, 100000, 100000}
}
// DEPRECATED
func (it *Base) GetResultTree() *graph.ResultTree {
tree := graph.NewResultTree(it.LastResult())
return tree
}
// Nothing in a base iterator.
func (it *Base) Next() (graph.TSVal, bool) {
return nil, false
}
func (it *Base) NextResult() bool {
return false
}
// Returns the last result of an iterator.
func (it *Base) LastResult() graph.TSVal {
return it.Last
}
// If you're empty and you know it, clap your hands.
func (it *Base) Size() (int64, bool) {
return 0, true
}
// No subiterators. Only those with subiterators need to do anything here.
func (it *Base) GetSubIterators() []graph.Iterator {
return nil
}
// Accessor
func (it *Base) Nextable() bool { return it.nextable }
// Fill the map based on the tags assigned to this iterator. Default
// functionality works well for most iterators.
func (it *Base) TagResults(out_map *map[string]graph.TSVal) {
for _, tag := range it.Tags() {
(*out_map)[tag] = it.LastResult()
}
for tag, value := range it.FixedTags() {
(*out_map)[tag] = value
}
}
// Nothing to clean up.
// func (it *Base) Close() {}
func (it *Null) Close() {}
func (it *Base) Reset() {}
// Here we define the simplest base iterator -- the Null iterator. It contains nothing.
// It is the empty set. Often times, queries that contain one of these match nothing,
// so it's important to give it a special iterator.
type Null struct {
Base
}
// Fairly useless New function.
func NewNull() *Null {
return &Null{}
}
func (it *Null) Clone() graph.Iterator { return NewNull() }
// Name the null iterator.
func (it *Null) Type() string { return "null" }
// A good iterator will close itself when it returns true.
// Null has nothing it needs to do.
func (it *Null) Optimize() (graph.Iterator, bool) { return it, false }
// Print the null iterator.
func (it *Null) DebugString(indent int) string {
return strings.Repeat(" ", indent) + "(null)"
}
// A null iterator costs nothing. Use it!
func (it *Null) GetStats() *graph.IteratorStats {
return &graph.IteratorStats{}
}
// Utility logging functions for when an iterator gets called Next upon, or Check upon, as
// well as what they return. Highly useful for tracing the execution path of a query.
func CheckLogIn(it graph.Iterator, val graph.TSVal) {
if glog.V(4) {
glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type()), it.GetUid(), val)
}
}
func CheckLogOut(it graph.Iterator, val graph.TSVal, good bool) bool {
if glog.V(4) {
if good {
glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type()), it.GetUid(), val)
} else {
glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type()), it.GetUid(), val)
}
}
return good
}
func NextLogIn(it graph.Iterator) {
if glog.V(4) {
glog.V(4).Infof("%s %d NEXT", strings.ToUpper(it.Type()), it.GetUid())
}
}
func NextLogOut(it graph.Iterator, val graph.TSVal, ok bool) (graph.TSVal, bool) {
if glog.V(4) {
if ok {
glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type()), it.GetUid(), val)
} else {
glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type()), it.GetUid())
}
}
return val, ok
}

View file

@ -0,0 +1,183 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Defines one of the base iterators, the LinksTo iterator. A LinksTo takes a
// subiterator of nodes, and contains an iteration of links which "link to"
// those nodes in a given direction.
//
// Next()ing a LinksTo is straightforward -- iterate through all links to //
// things in the subiterator, and then advance the subiterator, and do it again.
// LinksTo is therefore sensitive to growing with a fanout. (A small-sized
// subiterator could cause LinksTo to be large).
//
// Check()ing a LinksTo means, given a link, take the direction we care about
// and check if it's in our subiterator. Checking is therefore fairly cheap, and
// similar to checking the subiterator alone.
//
// Can be seen as the dual of the HasA iterator.
import (
"fmt"
"strings"
"github.com/google/cayley/graph"
)
// A LinksTo has a reference back to the graph.TripleStore (to create the iterators
// for each node) the subiterator, and the direction the iterator comes from.
// `next_it` is the tempoarary iterator held per result in `primary_it`.
type LinksTo struct {
Base
ts graph.TripleStore
primaryIt graph.Iterator
dir graph.Direction
nextIt graph.Iterator
}
// Construct a new LinksTo iterator around a direction and a subiterator of
// nodes.
func NewLinksTo(ts graph.TripleStore, it graph.Iterator, d graph.Direction) *LinksTo {
var lto LinksTo
BaseInit(&lto.Base)
lto.ts = ts
lto.primaryIt = it
lto.dir = d
lto.nextIt = &Null{}
return &lto
}
func (it *LinksTo) Reset() {
it.primaryIt.Reset()
if it.nextIt != nil {
it.nextIt.Close()
}
it.nextIt = &Null{}
}
func (it *LinksTo) Clone() graph.Iterator {
out := NewLinksTo(it.ts, it.primaryIt.Clone(), it.dir)
out.CopyTagsFrom(it)
return out
}
// Return the direction under consideration.
func (it *LinksTo) Direction() graph.Direction { return it.dir }
// Tag these results, and our subiterator's results.
func (it *LinksTo) TagResults(out *map[string]graph.TSVal) {
it.Base.TagResults(out)
it.primaryIt.TagResults(out)
}
// DEPRECATED
func (it *LinksTo) GetResultTree() *graph.ResultTree {
tree := graph.NewResultTree(it.LastResult())
tree.AddSubtree(it.primaryIt.GetResultTree())
return tree
}
// Print the iterator.
func (it *LinksTo) DebugString(indent int) string {
return fmt.Sprintf("%s(%s %d direction:%s\n%s)",
strings.Repeat(" ", indent),
it.Type(), it.GetUid(), it.dir, it.primaryIt.DebugString(indent+4))
}
// If it checks in the right direction for the subiterator, it is a valid link
// for the LinksTo.
func (it *LinksTo) Check(val graph.TSVal) bool {
CheckLogIn(it, val)
node := it.ts.GetTripleDirection(val, it.dir)
if it.primaryIt.Check(node) {
it.Last = val
return CheckLogOut(it, val, true)
}
return CheckLogOut(it, val, false)
}
// Return a list containing only our subiterator.
func (it *LinksTo) GetSubIterators() []graph.Iterator {
return []graph.Iterator{it.primaryIt}
}
// Optimize the LinksTo, by replacing it if it can be.
func (it *LinksTo) Optimize() (graph.Iterator, bool) {
newPrimary, changed := it.primaryIt.Optimize()
if changed {
it.primaryIt = newPrimary
if it.primaryIt.Type() == "null" {
it.nextIt.Close()
return it.primaryIt, true
}
}
// Ask the graph.TripleStore if we can be replaced. Often times, this is a great
// optimization opportunity (there's a fixed iterator underneath us, for
// example).
newReplacement, hasOne := it.ts.OptimizeIterator(it)
if hasOne {
it.Close()
return newReplacement, true
}
return it, false
}
// Next()ing a LinksTo operates as described above.
func (it *LinksTo) Next() (graph.TSVal, bool) {
NextLogIn(it)
val, ok := it.nextIt.Next()
if !ok {
// Subiterator is empty, get another one
candidate, ok := it.primaryIt.Next()
if !ok {
// We're out of nodes in our subiterator, so we're done as well.
return NextLogOut(it, 0, false)
}
it.nextIt.Close()
it.nextIt = it.ts.GetTripleIterator(it.dir, candidate)
// Recurse -- return the first in the next set.
return it.Next()
}
it.Last = val
return NextLogOut(it, val, ok)
}
// Close our subiterators.
func (it *LinksTo) Close() {
it.nextIt.Close()
it.primaryIt.Close()
}
// We won't ever have a new result, but our subiterators might.
func (it *LinksTo) NextResult() bool {
return it.primaryIt.NextResult()
}
// Register the LinksTo.
func (it *LinksTo) Type() string { return "linksto" }
// Return a guess as to how big or costly it is to next the iterator.
func (it *LinksTo) GetStats() *graph.IteratorStats {
subitStats := it.primaryIt.GetStats()
// TODO(barakmich): These should really come from the triplestore itself
fanoutFactor := int64(20)
checkConstant := int64(1)
nextConstant := int64(2)
return &graph.IteratorStats{
NextCost: nextConstant + subitStats.NextCost,
CheckCost: checkConstant + subitStats.CheckCost,
Size: fanoutFactor * subitStats.Size,
}
}

View file

@ -0,0 +1,39 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"testing"
"github.com/google/cayley/graph"
)
func TestLinksTo(t *testing.T) {
ts := new(TestTripleStore)
tsFixed := newFixed()
tsFixed.AddValue(2)
ts.On("GetIdFor", "cool").Return(1)
ts.On("GetTripleIterator", graph.Object, 1).Return(tsFixed)
fixed := newFixed()
fixed.AddValue(ts.GetIdFor("cool"))
lto := NewLinksTo(ts, fixed, graph.Object)
val, ok := lto.Next()
if !ok {
t.Error("At least one triple matches the fixed object")
}
if val != 2 {
t.Errorf("Triple index 2, such as %s, should match %s", ts.GetTriple(2), ts.GetTriple(val))
}
}

View file

@ -0,0 +1,60 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// A quickly mocked version of the TripleStore interface, for use in tests.
// Can better used Mock.Called but will fill in as needed.
import (
"github.com/stretchrcom/testify/mock"
"github.com/google/cayley/graph"
)
type TestTripleStore struct {
mock.Mock
}
func (ts *TestTripleStore) GetIdFor(s string) graph.TSVal {
args := ts.Mock.Called(s)
return args.Get(0)
}
func (ts *TestTripleStore) AddTriple(*graph.Triple) {}
func (ts *TestTripleStore) AddTripleSet([]*graph.Triple) {}
func (ts *TestTripleStore) GetTriple(graph.TSVal) *graph.Triple { return &graph.Triple{} }
func (ts *TestTripleStore) GetTripleIterator(d graph.Direction, i graph.TSVal) graph.Iterator {
args := ts.Mock.Called(d, i)
return args.Get(0).(graph.Iterator)
}
func (ts *TestTripleStore) GetNodesAllIterator() graph.Iterator { return &Null{} }
func (ts *TestTripleStore) GetTriplesAllIterator() graph.Iterator { return &Null{} }
func (ts *TestTripleStore) GetIteratorByString(string, string, string) graph.Iterator {
return &Null{}
}
func (ts *TestTripleStore) GetNameFor(v graph.TSVal) string {
args := ts.Mock.Called(v)
return args.Get(0).(string)
}
func (ts *TestTripleStore) Size() int64 { return 0 }
func (ts *TestTripleStore) DebugPrint() {}
func (ts *TestTripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {
return &Null{}, false
}
func (ts *TestTripleStore) FixedIterator() graph.FixedIterator {
return NewFixedIteratorWithCompare(BasicEquality)
}
func (ts *TestTripleStore) Close() {}
func (ts *TestTripleStore) GetTripleDirection(graph.TSVal, graph.Direction) graph.TSVal { return 0 }
func (ts *TestTripleStore) RemoveTriple(t *graph.Triple) {}

View file

@ -0,0 +1,137 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// "Optional" is kind of odd. It's not an iterator in the strictest sense, but
// it's easier to implement as an iterator.
//
// Consider what it means. It means that we have a subconstraint which we do
// not want to constrain the query -- we just want it to return the matching
// subgraph if one matches at all. By analogy to regular expressions, it is the
// '?' operator.
//
// If it were a proper iterator of its own (and indeed, a reasonable refactor
// of this iterator would be to make it such) it would contain an all iterator
// -- all things in the graph. It matches everything (as does the regex "(a)?")
import (
"fmt"
"strings"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
)
// An optional iterator has the subconstraint iterator we wish to be optional
// and whether the last check we received was true or false.
type Optional struct {
Base
subIt graph.Iterator
lastCheck bool
}
// Creates a new optional iterator.
func NewOptional(it graph.Iterator) *Optional {
var o Optional
BaseInit(&o.Base)
o.nextable = false
o.subIt = it
return &o
}
func (it *Optional) Reset() {
it.subIt.Reset()
it.lastCheck = false
}
func (it *Optional) Close() {
it.subIt.Close()
}
func (it *Optional) Clone() graph.Iterator {
out := NewOptional(it.subIt.Clone())
out.CopyTagsFrom(it)
return out
}
// Nexting the iterator is unsupported -- error and return an empty set.
// (As above, a reasonable alternative would be to Next() an all iterator)
func (it *Optional) Next() (graph.TSVal, bool) {
glog.Errorln("Nexting an un-nextable iterator")
return nil, false
}
// An optional iterator only has a next result if, (a) last time we checked
// we had any results whatsoever, and (b) there was another subresult in our
// optional subbranch.
func (it *Optional) NextResult() bool {
if it.lastCheck {
return it.subIt.NextResult()
}
return false
}
// Check() is the real hack of this iterator. It always returns true, regardless
// of whether the subiterator matched. But we keep track of whether the subiterator
// matched for results purposes.
func (it *Optional) Check(val graph.TSVal) bool {
checked := it.subIt.Check(val)
it.lastCheck = checked
it.Last = val
return true
}
// If we failed the check, then the subiterator should not contribute to the result
// set. Otherwise, go ahead and tag it.
func (it *Optional) TagResults(out *map[string]graph.TSVal) {
if it.lastCheck == false {
return
}
it.subIt.TagResults(out)
}
// Registers the optional iterator.
func (it *Optional) Type() string { return "optional" }
// Prints the optional and it's subiterator.
func (it *Optional) DebugString(indent int) string {
return fmt.Sprintf("%s(%s tags:%s\n%s)",
strings.Repeat(" ", indent),
it.Type(),
it.Tags(),
it.subIt.DebugString(indent+4))
}
// There's nothing to optimize for an optional. Optimize the subiterator and
// potentially replace it.
func (it *Optional) Optimize() (graph.Iterator, bool) {
newSub, changed := it.subIt.Optimize()
if changed {
it.subIt.Close()
it.subIt = newSub
}
return it, false
}
// We're only as expensive as our subiterator. Except, we can't be nexted.
func (it *Optional) GetStats() *graph.IteratorStats {
subStats := it.subIt.GetStats()
return &graph.IteratorStats{
CheckCost: subStats.CheckCost,
NextCost: int64(1 << 62),
Size: subStats.Size,
}
}

View file

@ -0,0 +1,284 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Defines the or and short-circuiting or iterator. Or is the union operator for it's subiterators.
// Short-circuiting-or is a little different. It will return values from the first graph.iterator that returns
// values at all, and then stops.
//
// Never reorders the iterators from the order they arrive. It is either the union or the first one.
// May return the same value twice -- once for each branch.
import (
"fmt"
"strings"
"github.com/google/cayley/graph"
)
type Or struct {
Base
isShortCircuiting bool
internalIterators []graph.Iterator
itCount int
currentIterator int
}
func NewOr() *Or {
var or Or
BaseInit(&or.Base)
or.internalIterators = make([]graph.Iterator, 0, 20)
or.isShortCircuiting = false
or.currentIterator = -1
return &or
}
func NewShortCircuitOr() *Or {
var or Or
BaseInit(&or.Base)
or.internalIterators = make([]graph.Iterator, 0, 20)
or.isShortCircuiting = true
or.currentIterator = -1
return &or
}
// Reset all internal iterators
func (it *Or) Reset() {
for _, sub := range it.internalIterators {
sub.Reset()
}
it.currentIterator = -1
}
func (it *Or) Clone() graph.Iterator {
var or *Or
if it.isShortCircuiting {
or = NewShortCircuitOr()
} else {
or = NewOr()
}
for _, sub := range it.internalIterators {
or.AddSubIterator(sub.Clone())
}
it.CopyTagsFrom(it)
return or
}
// Returns a list.List of the subiterators, in order. The returned slice must not be modified.
func (it *Or) GetSubIterators() []graph.Iterator {
return it.internalIterators
}
// Overrides BaseIterator TagResults, as it needs to add it's own results and
// recurse down it's subiterators.
func (it *Or) TagResults(out *map[string]graph.TSVal) {
it.Base.TagResults(out)
it.internalIterators[it.currentIterator].TagResults(out)
}
// DEPRECATED Returns the ResultTree for this graph.iterator, recurses to it's subiterators.
func (it *Or) GetResultTree() *graph.ResultTree {
tree := graph.NewResultTree(it.LastResult())
for _, sub := range it.internalIterators {
tree.AddSubtree(sub.GetResultTree())
}
return tree
}
// Prints information about this graph.iterator.
func (it *Or) DebugString(indent int) string {
var total string
for i, sub := range it.internalIterators {
total += strings.Repeat(" ", indent+2)
total += fmt.Sprintf("%d:\n%s\n", i, sub.DebugString(indent+4))
}
var tags string
for _, k := range it.Tags() {
tags += fmt.Sprintf("%s;", k)
}
spaces := strings.Repeat(" ", indent+2)
return fmt.Sprintf("%s(%s\n%stags:%s\n%sits:\n%s)",
strings.Repeat(" ", indent),
it.Type(),
spaces,
tags,
spaces,
total)
}
// Add a subiterator to this Or graph.iterator. Order matters.
func (it *Or) AddSubIterator(sub graph.Iterator) {
it.internalIterators = append(it.internalIterators, sub)
it.itCount++
}
// Returns the Next value from the Or graph.iterator. Because the Or is the
// union of its subiterators, it must produce from all subiterators -- unless
// it's shortcircuiting, in which case, it's the first one that returns anything.
func (it *Or) Next() (graph.TSVal, bool) {
NextLogIn(it)
var curr graph.TSVal
var exists bool
firstTime := false
for {
if it.currentIterator == -1 {
it.currentIterator = 0
firstTime = true
}
curIt := it.internalIterators[it.currentIterator]
curr, exists = curIt.Next()
if !exists {
if it.isShortCircuiting && !firstTime {
return NextLogOut(it, nil, false)
}
it.currentIterator++
if it.currentIterator == it.itCount {
return NextLogOut(it, nil, false)
}
} else {
it.Last = curr
return NextLogOut(it, curr, true)
}
}
panic("Somehow broke out of Next() loop in Or")
}
// Checks a value against the iterators, in order.
func (it *Or) checkSubIts(val graph.TSVal) bool {
var subIsGood = false
for i, sub := range it.internalIterators {
subIsGood = sub.Check(val)
if subIsGood {
it.currentIterator = i
break
}
}
return subIsGood
}
// Check a value against the entire graph.iterator, in order.
func (it *Or) Check(val graph.TSVal) bool {
CheckLogIn(it, val)
anyGood := it.checkSubIts(val)
if !anyGood {
return CheckLogOut(it, val, false)
}
it.Last = val
return CheckLogOut(it, val, true)
}
// Returns the approximate size of the Or graph.iterator. Because we're dealing
// with a union, we know that the largest we can be is the sum of all the iterators,
// or in the case of short-circuiting, the longest.
func (it *Or) Size() (int64, bool) {
var val int64
var b bool
if it.isShortCircuiting {
val = 0
b = true
for _, sub := range it.internalIterators {
newval, newb := sub.Size()
if val < newval {
val = newval
}
b = newb && b
}
} else {
val = 0
b = true
for _, sub := range it.internalIterators {
newval, newb := sub.Size()
val += newval
b = newb && b
}
}
return val, b
}
// An Or has no NextResult of its own -- that is, there are no other values
// which satisfy our previous result that are not the result itself. Our
// subiterators might, however, so just pass the call recursively. In the case of
// shortcircuiting, only allow new results from the currently checked graph.iterator
func (it *Or) NextResult() bool {
if it.currentIterator != -1 {
return it.internalIterators[it.currentIterator].NextResult()
}
return false
}
// Perform or-specific cleanup, of which there currently is none.
func (it *Or) cleanUp() {}
// Close this graph.iterator, and, by extension, close the subiterators.
// Close should be idempotent, and it follows that if it's subiterators
// follow this contract, the And follows the contract.
func (it *Or) Close() {
it.cleanUp()
for _, sub := range it.internalIterators {
sub.Close()
}
}
func (it *Or) Optimize() (graph.Iterator, bool) {
old := it.GetSubIterators()
optIts := optimizeSubIterators(old)
// Close the replaced iterators (they ought to close themselves, but Close()
// is idempotent, so this just protects against any machinations).
closeIteratorList(old, nil)
newOr := NewOr()
newOr.isShortCircuiting = it.isShortCircuiting
// Add the subiterators in order.
for _, o := range optIts {
newOr.AddSubIterator(o)
}
// Move the tags hanging on us (like any good replacement).
newOr.CopyTagsFrom(it)
// And close ourselves but not our subiterators -- some may still be alive in
// the new And (they were unchanged upon calling Optimize() on them, at the
// start).
it.cleanUp()
return newOr, true
}
func (it *Or) GetStats() *graph.IteratorStats {
CheckCost := int64(0)
NextCost := int64(0)
Size := int64(0)
for _, sub := range it.internalIterators {
stats := sub.GetStats()
NextCost += stats.NextCost
CheckCost += stats.CheckCost
if it.isShortCircuiting {
if Size < stats.Size {
Size = stats.Size
}
} else {
Size += stats.Size
}
}
return &graph.IteratorStats{
CheckCost: CheckCost,
NextCost: NextCost,
Size: Size,
}
}
// Register this as an "or" graph.iterator.
func (it *Or) Type() string { return "or" }

View file

@ -0,0 +1,145 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"testing"
. "github.com/smartystreets/goconvey/convey"
"github.com/google/cayley/graph"
)
func extractNumbersFromIterator(it graph.Iterator) []int {
var outputNumbers []int
for {
val, ok := it.Next()
if !ok {
break
}
outputNumbers = append(outputNumbers, val.(int))
}
return outputNumbers
}
func TestOrIteratorBasics(t *testing.T) {
var orIt *Or
Convey("Given an Or Iterator of two fixed iterators", t, func() {
orIt = NewOr()
fixed1 := newFixed()
fixed1.AddValue(1)
fixed1.AddValue(2)
fixed1.AddValue(3)
fixed2 := newFixed()
fixed2.AddValue(3)
fixed2.AddValue(9)
fixed2.AddValue(20)
fixed2.AddValue(21)
orIt.AddSubIterator(fixed1)
orIt.AddSubIterator(fixed2)
Convey("It should guess its size.", func() {
v, _ := orIt.Size()
So(v, ShouldEqual, 7)
})
Convey("It should extract all the numbers, potentially twice.", func() {
allNumbers := []int{1, 2, 3, 3, 9, 20, 21}
So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers)
orIt.Reset()
So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers)
// Optimization works
newOr, _ := orIt.Optimize()
So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers)
})
Convey("It should check that numbers in either iterator exist.", func() {
So(orIt.Check(2), ShouldEqual, true)
So(orIt.Check(3), ShouldEqual, true)
So(orIt.Check(21), ShouldEqual, true)
})
Convey("It should check that numbers not in either iterator are false.", func() {
So(orIt.Check(22), ShouldEqual, false)
So(orIt.Check(5), ShouldEqual, false)
So(orIt.Check(0), ShouldEqual, false)
})
})
}
func TestShortCircuitingOrBasics(t *testing.T) {
var orIt *Or
Convey("Given a short-circuiting Or of two fixed iterators", t, func() {
orIt = NewShortCircuitOr()
fixed1 := newFixed()
fixed1.AddValue(1)
fixed1.AddValue(2)
fixed1.AddValue(3)
fixed2 := newFixed()
fixed2.AddValue(3)
fixed2.AddValue(9)
fixed2.AddValue(20)
fixed2.AddValue(21)
Convey("It should guess its size.", func() {
orIt.AddSubIterator(fixed1)
orIt.AddSubIterator(fixed2)
v, _ := orIt.Size()
So(v, ShouldEqual, 4)
})
Convey("It should extract the first iterators' numbers.", func() {
orIt.AddSubIterator(fixed1)
orIt.AddSubIterator(fixed2)
allNumbers := []int{1, 2, 3}
So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers)
orIt.Reset()
So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers)
// Optimization works
newOr, _ := orIt.Optimize()
So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers)
})
Convey("It should check that numbers in either iterator exist.", func() {
orIt.AddSubIterator(fixed1)
orIt.AddSubIterator(fixed2)
So(orIt.Check(2), ShouldEqual, true)
So(orIt.Check(3), ShouldEqual, true)
So(orIt.Check(21), ShouldEqual, true)
So(orIt.Check(22), ShouldEqual, false)
So(orIt.Check(5), ShouldEqual, false)
So(orIt.Check(0), ShouldEqual, false)
})
Convey("It should check that it pulls the second iterator's numbers if the first is empty.", func() {
orIt.AddSubIterator(newFixed())
orIt.AddSubIterator(fixed2)
allNumbers := []int{3, 9, 20, 21}
So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers)
orIt.Reset()
So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers)
// Optimization works
newOr, _ := orIt.Optimize()
So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers)
})
})
}

View file

@ -0,0 +1,181 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"github.com/google/cayley/graph"
)
type Node struct {
Id int `json:"id"`
Tags []string `json:"tags,omitempty"`
Values []string `json:"values,omitempty"`
IsLinkNode bool `json:"is_link_node"`
IsFixed bool `json:"is_fixed"`
}
type Link struct {
Source int `json:"source"`
Target int `json:"target"`
Pred int `json:"type"`
LinkNode int `json:"link_node"`
}
type queryShape struct {
nodes []Node
links []Link
ts graph.TripleStore
nodeId int
hasaIds []int
hasaDirs []graph.Direction
}
func OutputQueryShapeForIterator(it graph.Iterator, ts graph.TripleStore, outputMap *map[string]interface{}) {
qs := &queryShape{
ts: ts,
nodeId: 1,
}
node := qs.MakeNode(it.Clone())
qs.AddNode(node)
(*outputMap)["nodes"] = qs.nodes
(*outputMap)["links"] = qs.links
}
func (qs *queryShape) AddNode(n *Node) {
qs.nodes = append(qs.nodes, *n)
}
func (qs *queryShape) AddLink(l *Link) {
qs.links = append(qs.links, *l)
}
func (qs *queryShape) LastHasa() (int, graph.Direction) {
return qs.hasaIds[len(qs.hasaIds)-1], qs.hasaDirs[len(qs.hasaDirs)-1]
}
func (qs *queryShape) PushHasa(i int, d graph.Direction) {
qs.hasaIds = append(qs.hasaIds, i)
qs.hasaDirs = append(qs.hasaDirs, d)
}
func (qs *queryShape) RemoveHasa() {
qs.hasaIds = qs.hasaIds[:len(qs.hasaIds)-1]
qs.hasaDirs = qs.hasaDirs[:len(qs.hasaDirs)-1]
}
func (qs *queryShape) StealNode(left *Node, right *Node) {
for _, v := range right.Values {
left.Values = append(left.Values, v)
}
for _, v := range right.Tags {
left.Tags = append(left.Tags, v)
}
left.IsLinkNode = left.IsLinkNode || right.IsLinkNode
left.IsFixed = left.IsFixed || right.IsFixed
for i, link := range qs.links {
rewrite := false
if link.LinkNode == right.Id {
link.LinkNode = left.Id
rewrite = true
}
if link.Source == right.Id {
link.Source = left.Id
rewrite = true
}
if link.Target == right.Id {
link.Target = left.Id
rewrite = true
}
if rewrite {
qs.links = append(append(qs.links[:i], qs.links[i+1:]...), link)
}
}
}
func (qs *queryShape) MakeNode(it graph.Iterator) *Node {
n := Node{Id: qs.nodeId}
for _, tag := range it.Tags() {
n.Tags = append(n.Tags, tag)
}
for k, _ := range it.FixedTags() {
n.Tags = append(n.Tags, k)
}
switch it.Type() {
case "and":
for _, sub := range it.GetSubIterators() {
qs.nodeId++
newNode := qs.MakeNode(sub)
if sub.Type() != "or" {
qs.StealNode(&n, newNode)
} else {
qs.AddNode(newNode)
qs.AddLink(&Link{n.Id, newNode.Id, 0, 0})
}
}
case "fixed":
n.IsFixed = true
for {
val, more := it.Next()
if !more {
break
}
n.Values = append(n.Values, qs.ts.GetNameFor(val))
}
case "hasa":
hasa := it.(*HasA)
qs.PushHasa(n.Id, hasa.dir)
qs.nodeId++
newNode := qs.MakeNode(hasa.primaryIt)
qs.AddNode(newNode)
qs.RemoveHasa()
case "or":
for _, sub := range it.GetSubIterators() {
qs.nodeId++
newNode := qs.MakeNode(sub)
if sub.Type() == "or" {
qs.StealNode(&n, newNode)
} else {
qs.AddNode(newNode)
qs.AddLink(&Link{n.Id, newNode.Id, 0, 0})
}
}
case "linksto":
n.IsLinkNode = true
lto := it.(*LinksTo)
qs.nodeId++
newNode := qs.MakeNode(lto.primaryIt)
hasaID, hasaDir := qs.LastHasa()
if (hasaDir == graph.Subject && lto.dir == graph.Object) ||
(hasaDir == graph.Object && lto.dir == graph.Subject) {
qs.AddNode(newNode)
if hasaDir == graph.Subject {
qs.AddLink(&Link{hasaID, newNode.Id, 0, n.Id})
} else {
qs.AddLink(&Link{newNode.Id, hasaID, 0, n.Id})
}
} else if lto.primaryIt.Type() == "fixed" {
qs.StealNode(&n, newNode)
} else {
qs.AddNode(newNode)
}
case "optional":
// Unsupported, for the moment
fallthrough
case "all":
}
return &n
}

View file

@ -0,0 +1,126 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"testing"
. "github.com/smartystreets/goconvey/convey"
"github.com/google/cayley/graph"
)
func buildHasaWithTag(ts graph.TripleStore, tag string, target string) *HasA {
fixed_obj := ts.FixedIterator()
fixed_pred := ts.FixedIterator()
fixed_obj.AddValue(ts.GetIdFor(target))
fixed_pred.AddValue(ts.GetIdFor("status"))
fixed_obj.AddTag(tag)
lto1 := NewLinksTo(ts, fixed_obj, graph.Object)
lto2 := NewLinksTo(ts, fixed_pred, graph.Predicate)
and := NewAnd()
and.AddSubIterator(lto1)
and.AddSubIterator(lto2)
hasa := NewHasA(ts, and, graph.Subject)
return hasa
}
func TestQueryShape(t *testing.T) {
var queryShape map[string]interface{}
ts := new(TestTripleStore)
ts.On("GetIdFor", "cool").Return(1)
ts.On("GetNameFor", 1).Return("cool")
ts.On("GetIdFor", "status").Return(2)
ts.On("GetNameFor", 2).Return("status")
ts.On("GetIdFor", "fun").Return(3)
ts.On("GetNameFor", 3).Return("fun")
ts.On("GetIdFor", "name").Return(4)
ts.On("GetNameFor", 4).Return("name")
Convey("Given a single linkage iterator's shape", t, func() {
queryShape = make(map[string]interface{})
hasa := buildHasaWithTag(ts, "tag", "cool")
hasa.AddTag("top")
OutputQueryShapeForIterator(hasa, ts, &queryShape)
Convey("It should have three nodes and one link", func() {
nodes := queryShape["nodes"].([]Node)
links := queryShape["links"].([]Link)
So(len(nodes), ShouldEqual, 3)
So(len(links), ShouldEqual, 1)
})
Convey("These nodes should be correctly tagged", func() {
nodes := queryShape["nodes"].([]Node)
So(nodes[0].Tags, ShouldResemble, []string{"tag"})
So(nodes[1].IsLinkNode, ShouldEqual, true)
So(nodes[2].Tags, ShouldResemble, []string{"top"})
})
Convey("The link should be correctly typed", func() {
nodes := queryShape["nodes"].([]Node)
links := queryShape["links"].([]Link)
So(links[0].Source, ShouldEqual, nodes[2].Id)
So(links[0].Target, ShouldEqual, nodes[0].Id)
So(links[0].LinkNode, ShouldEqual, nodes[1].Id)
So(links[0].Pred, ShouldEqual, 0)
})
})
Convey("Given a name-of-an-and-iterator's shape", t, func() {
queryShape = make(map[string]interface{})
hasa1 := buildHasaWithTag(ts, "tag1", "cool")
hasa1.AddTag("hasa1")
hasa2 := buildHasaWithTag(ts, "tag2", "fun")
hasa1.AddTag("hasa2")
andInternal := NewAnd()
andInternal.AddSubIterator(hasa1)
andInternal.AddSubIterator(hasa2)
fixed_pred := ts.FixedIterator()
fixed_pred.AddValue(ts.GetIdFor("name"))
lto1 := NewLinksTo(ts, andInternal, graph.Subject)
lto2 := NewLinksTo(ts, fixed_pred, graph.Predicate)
and := NewAnd()
and.AddSubIterator(lto1)
and.AddSubIterator(lto2)
hasa := NewHasA(ts, and, graph.Object)
OutputQueryShapeForIterator(hasa, ts, &queryShape)
Convey("It should have seven nodes and three links", func() {
nodes := queryShape["nodes"].([]Node)
links := queryShape["links"].([]Link)
So(len(nodes), ShouldEqual, 7)
So(len(links), ShouldEqual, 3)
})
Convey("Three of the nodes are link nodes, four aren't", func() {
nodes := queryShape["nodes"].([]Node)
count := 0
for _, node := range nodes {
if node.IsLinkNode {
count++
}
}
So(count, ShouldEqual, 3)
})
Convey("These nodes should be correctly tagged", nil)
})
}

View file

@ -0,0 +1,190 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// "Value Comparison" is a unary operator -- a filter across the values in the
// relevant subiterator.
//
// This is hugely useful for things like provenance, but value ranges in general
// come up from time to time. At *worst* we're as big as our underlying iterator.
// At best, we're the null iterator.
//
// This is ripe for backend-side optimization. If you can run a value iterator,
// from a sorted set -- some sort of value index, then go for it.
//
// In MQL terms, this is the [{"age>=": 21}] concept.
import (
"fmt"
"log"
"strconv"
"strings"
"github.com/google/cayley/graph"
)
type Operator int
const (
kCompareLT Operator = iota
kCompareLTE
kCompareGT
kCompareGTE
// Why no Equals? Because that's usually an AndIterator.
)
type Comparison struct {
Base
subIt graph.Iterator
op Operator
val interface{}
ts graph.TripleStore
}
func NewComparison(sub graph.Iterator, op Operator, val interface{}, ts graph.TripleStore) *Comparison {
var vc Comparison
BaseInit(&vc.Base)
vc.subIt = sub
vc.op = op
vc.val = val
vc.ts = ts
return &vc
}
// Here's the non-boilerplate part of the ValueComparison iterator. Given a value
// and our operator, determine whether or not we meet the requirement.
func (it *Comparison) doComparison(val graph.TSVal) bool {
//TODO(barakmich): Implement string comparison.
nodeStr := it.ts.GetNameFor(val)
switch cVal := it.val.(type) {
case int:
cInt := int64(cVal)
intVal, err := strconv.ParseInt(nodeStr, 10, 64)
if err != nil {
return false
}
return RunIntOp(intVal, it.op, cInt)
case int64:
intVal, err := strconv.ParseInt(nodeStr, 10, 64)
if err != nil {
return false
}
return RunIntOp(intVal, it.op, cVal)
default:
return true
}
}
func (it *Comparison) Close() {
it.subIt.Close()
}
func RunIntOp(a int64, op Operator, b int64) bool {
switch op {
case kCompareLT:
return a < b
case kCompareLTE:
return a <= b
case kCompareGT:
return a > b
case kCompareGTE:
return a >= b
default:
log.Fatal("Unknown operator type")
return false
}
}
func (it *Comparison) Reset() {
it.subIt.Reset()
}
func (it *Comparison) Clone() graph.Iterator {
out := NewComparison(it.subIt.Clone(), it.op, it.val, it.ts)
out.CopyTagsFrom(it)
return out
}
func (it *Comparison) Next() (graph.TSVal, bool) {
var val graph.TSVal
var ok bool
for {
val, ok = it.subIt.Next()
if !ok {
return nil, false
}
if it.doComparison(val) {
break
}
}
it.Last = val
return val, ok
}
func (it *Comparison) NextResult() bool {
for {
hasNext := it.subIt.NextResult()
if !hasNext {
return false
}
if it.doComparison(it.subIt.LastResult()) {
return true
}
}
it.Last = it.subIt.LastResult()
return true
}
func (it *Comparison) Check(val graph.TSVal) bool {
if !it.doComparison(val) {
return false
}
return it.subIt.Check(val)
}
// If we failed the check, then the subiterator should not contribute to the result
// set. Otherwise, go ahead and tag it.
func (it *Comparison) TagResults(out *map[string]graph.TSVal) {
it.Base.TagResults(out)
it.subIt.TagResults(out)
}
// Registers the value-comparison iterator.
func (it *Comparison) Type() string { return "value-comparison" }
// Prints the value-comparison and its subiterator.
func (it *Comparison) DebugString(indent int) string {
return fmt.Sprintf("%s(%s\n%s)",
strings.Repeat(" ", indent),
it.Type(), it.subIt.DebugString(indent+4))
}
// There's nothing to optimize, locally, for a value-comparison iterator.
// Replace the underlying iterator if need be.
// potentially replace it.
func (it *Comparison) Optimize() (graph.Iterator, bool) {
newSub, changed := it.subIt.Optimize()
if changed {
it.subIt.Close()
it.subIt = newSub
}
return it, false
}
// We're only as expensive as our subiterator.
// Again, optimized value comparison iterators should do better.
func (it *Comparison) GetStats() *graph.IteratorStats {
return it.subIt.GetStats()
}

View file

@ -0,0 +1,128 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"testing"
"github.com/google/cayley/graph"
)
func SetupMockTripleStore(nameMap map[string]int) *TestTripleStore {
ts := new(TestTripleStore)
for k, v := range nameMap {
ts.On("GetIdFor", k).Return(v)
ts.On("GetNameFor", v).Return(k)
}
return ts
}
func SimpleValueTripleStore() *TestTripleStore {
ts := SetupMockTripleStore(map[string]int{
"0": 0,
"1": 1,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
})
return ts
}
func BuildFixedIterator() *Fixed {
fixed := newFixed()
fixed.AddValue(0)
fixed.AddValue(1)
fixed.AddValue(2)
fixed.AddValue(3)
fixed.AddValue(4)
return fixed
}
func checkIteratorContains(ts graph.TripleStore, it graph.Iterator, expected []string, t *testing.T) {
var actual []string
actual = nil
for {
val, ok := it.Next()
if !ok {
break
}
actual = append(actual, ts.GetNameFor(val))
}
actualSet := actual[:]
for _, a := range expected {
found := false
for j, b := range actualSet {
if a == b {
actualSet = append(actualSet[:j], actualSet[j+1:]...)
found = true
break
}
}
if !found {
t.Error("Couldn't find", a, "in actual output.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet)
return
}
}
if len(actualSet) != 0 {
t.Error("Actual output has more than expected.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet)
}
}
func TestWorkingIntValueComparison(t *testing.T) {
ts := SimpleValueTripleStore()
fixed := BuildFixedIterator()
vc := NewComparison(fixed, kCompareLT, int64(3), ts)
checkIteratorContains(ts, vc, []string{"0", "1", "2"}, t)
}
func TestFailingIntValueComparison(t *testing.T) {
ts := SimpleValueTripleStore()
fixed := BuildFixedIterator()
vc := NewComparison(fixed, kCompareLT, int64(0), ts)
checkIteratorContains(ts, vc, []string{}, t)
}
func TestWorkingGT(t *testing.T) {
ts := SimpleValueTripleStore()
fixed := BuildFixedIterator()
vc := NewComparison(fixed, kCompareGT, int64(2), ts)
checkIteratorContains(ts, vc, []string{"3", "4"}, t)
}
func TestWorkingGTE(t *testing.T) {
ts := SimpleValueTripleStore()
fixed := BuildFixedIterator()
vc := NewComparison(fixed, kCompareGTE, int64(2), ts)
checkIteratorContains(ts, vc, []string{"2", "3", "4"}, t)
}
func TestVCICheck(t *testing.T) {
ts := SimpleValueTripleStore()
fixed := BuildFixedIterator()
vc := NewComparison(fixed, kCompareGTE, int64(2), ts)
if vc.Check(1) {
t.Error("1 is less than 2, should be GTE")
}
if !vc.Check(2) {
t.Error("2 is GTE 2")
}
if !vc.Check(3) {
t.Error("3 is GTE 2")
}
if vc.Check(5) {
t.Error("5 is not in the underlying iterator")
}
}