From 1768e593a8be13f20d64d3d8b71a5c1687b6dce6 Mon Sep 17 00:00:00 2001 From: kortschak Date: Mon, 30 Jun 2014 22:22:50 +0930 Subject: [PATCH] Move iterators into separate package Also reduce API exposure and use standard library more - and fix bugs I previously introduces in mongo. --- graph/all_iterator.go | 116 --------- graph/and_iterator.go | 246 ------------------ graph/and_iterator_optimize.go | 315 ---------------------- graph/and_iterator_optimize_test.go | 110 -------- graph/and_iterator_test.go | 147 ----------- graph/fixed_iterator.go | 156 ----------- graph/hasa_iterator.go | 221 ---------------- graph/iterator.go | 164 +----------- graph/iterator/all_iterator.go | 118 +++++++++ graph/iterator/and_iterator.go | 248 ++++++++++++++++++ graph/iterator/and_iterator_optimize.go | 317 +++++++++++++++++++++++ graph/iterator/and_iterator_optimize_test.go | 110 ++++++++ graph/iterator/and_iterator_test.go | 149 +++++++++++ graph/iterator/fixed_iterator.go | 157 +++++++++++ graph/iterator/hasa_iterator.go | 223 ++++++++++++++++ graph/iterator/iterator.go | 223 ++++++++++++++++ graph/iterator/linksto_iterator.go | 183 +++++++++++++ graph/iterator/linksto_iterator_test.go | 39 +++ graph/iterator/mock_ts_test.go | 60 +++++ graph/iterator/optional_iterator.go | 137 ++++++++++ graph/iterator/or_iterator.go | 284 ++++++++++++++++++++ graph/iterator/or_iterator_test.go | 145 +++++++++++ graph/iterator/query_shape.go | 181 +++++++++++++ graph/iterator/query_shape_test.go | 126 +++++++++ graph/iterator/value_comparison_iterator.go | 190 ++++++++++++++ graph/iterator/value_comparison_iterator_test.go | 128 +++++++++ graph/leveldb/all_iterator.go | 9 +- graph/leveldb/iterator.go | 9 +- graph/leveldb/leveldb_test.go | 75 +++--- graph/leveldb/triplestore.go | 11 +- graph/leveldb/triplestore_iterator_optimize.go | 5 +- graph/linksto_iterator.go | 181 ------------- graph/linksto_iterator_test.go | 37 --- graph/memstore/all_iterator.go | 7 +- graph/memstore/iterator.go | 5 +- graph/memstore/testing_memstore.go | 22 +- graph/memstore/triplestore.go | 11 +- graph/memstore/triplestore_iterator_optimize.go | 5 +- graph/memstore/triplestore_test.go | 33 +-- graph/mock_ts.go | 58 ----- graph/mongo/iterator.go | 5 +- graph/mongo/triplestore.go | 22 +- graph/mongo/triplestore_iterator_optimize.go | 5 +- graph/optional_iterator.go | 135 ---------- graph/or_iterator.go | 282 -------------------- graph/or_iterator_test.go | 142 ---------- graph/query_shape.go | 177 ------------- graph/query_shape_test.go | 125 --------- graph/result_tree_evaluator.go | 8 +- graph/result_tree_evaluator_test.go | 13 +- graph/sexp/parser.go | 19 +- graph/sexp/parser_test.go | 18 +- graph/triple.go | 10 +- graph/triplestore.go | 4 +- graph/value_comparison_iterator.go | 193 -------------- graph/value_comparison_iterator_test.go | 126 --------- http/write.go | 3 +- nquads/nquads.go | 2 +- query/gremlin/build_iterator.go | 89 +++---- query/gremlin/finals.go | 3 +- query/mql/build_iterator.go | 21 +- query/mql/session.go | 7 +- 62 files changed, 3240 insertions(+), 3130 deletions(-) delete mode 100644 graph/all_iterator.go delete mode 100644 graph/and_iterator.go delete mode 100644 graph/and_iterator_optimize.go delete mode 100644 graph/and_iterator_optimize_test.go delete mode 100644 graph/and_iterator_test.go delete mode 100644 graph/fixed_iterator.go delete mode 100644 graph/hasa_iterator.go create mode 100644 graph/iterator/all_iterator.go create mode 100644 graph/iterator/and_iterator.go create mode 100644 graph/iterator/and_iterator_optimize.go create mode 100644 graph/iterator/and_iterator_optimize_test.go create mode 100644 graph/iterator/and_iterator_test.go create mode 100644 graph/iterator/fixed_iterator.go create mode 100644 graph/iterator/hasa_iterator.go create mode 100644 graph/iterator/iterator.go create mode 100644 graph/iterator/linksto_iterator.go create mode 100644 graph/iterator/linksto_iterator_test.go create mode 100644 graph/iterator/mock_ts_test.go create mode 100644 graph/iterator/optional_iterator.go create mode 100644 graph/iterator/or_iterator.go create mode 100644 graph/iterator/or_iterator_test.go create mode 100644 graph/iterator/query_shape.go create mode 100644 graph/iterator/query_shape_test.go create mode 100644 graph/iterator/value_comparison_iterator.go create mode 100644 graph/iterator/value_comparison_iterator_test.go delete mode 100644 graph/linksto_iterator.go delete mode 100644 graph/linksto_iterator_test.go delete mode 100644 graph/mock_ts.go delete mode 100644 graph/optional_iterator.go delete mode 100644 graph/or_iterator.go delete mode 100644 graph/or_iterator_test.go delete mode 100644 graph/query_shape.go delete mode 100644 graph/query_shape_test.go delete mode 100644 graph/value_comparison_iterator.go delete mode 100644 graph/value_comparison_iterator_test.go diff --git a/graph/all_iterator.go b/graph/all_iterator.go deleted file mode 100644 index 989e1b5..0000000 --- a/graph/all_iterator.go +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the All iterator. Which, logically -// enough, represents all nodes or all links in the graph. -// -// This particular file is actually vestigal. It's up to the TripleStore to give -// us an All iterator that represents all things in the graph. So this is -// really the All iterator for the MemTripleStore. That said, it *is* one of -// the base iterators, and it helps just to see it here. - -import ( - "fmt" - "strings" -) - -// An All iterator across a range of int64 values, from `max` to `min`. -type Int64AllIterator struct { - BaseIterator - max, min int64 - at int64 -} - -// Creates a new Int64AllIterator with the given range. -func NewInt64AllIterator(min, max int64) *Int64AllIterator { - var all Int64AllIterator - BaseIteratorInit(&all.BaseIterator) - all.max = max - all.min = min - all.at = min - return &all -} - -// Start back at the beginning -func (it *Int64AllIterator) Reset() { - it.at = it.min -} - -func (it *Int64AllIterator) Close() {} - -func (it *Int64AllIterator) Clone() Iterator { - out := NewInt64AllIterator(it.min, it.max) - out.CopyTagsFrom(it) - return out -} - -// Prints the All iterator as just an "all". -func (it *Int64AllIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s tags: %v)", strings.Repeat(" ", indent), it.Type(), it.Tags()) -} - -// Next() on an Int64 all iterator is a simple incrementing counter. -// Return the next integer, and mark it as the result. -func (it *Int64AllIterator) Next() (TSVal, bool) { - NextLogIn(it) - if it.at == -1 { - return NextLogOut(it, nil, false) - } - val := it.at - it.at = it.at + 1 - if it.at > it.max { - it.at = -1 - } - it.Last = val - return NextLogOut(it, val, true) -} - -// The number of elements in an Int64AllIterator is the size of the range. -// The size is exact. -func (it *Int64AllIterator) Size() (int64, bool) { - Size := ((it.max - it.min) + 1) - return Size, true -} - -// Check() for an Int64AllIterator is merely seeing if the passed value is -// withing the range, assuming the value is an int64. -func (it *Int64AllIterator) Check(tsv TSVal) bool { - CheckLogIn(it, tsv) - v := tsv.(int64) - if it.min <= v && v <= it.max { - it.Last = v - return CheckLogOut(it, v, true) - } - return CheckLogOut(it, v, false) -} - -// The type of this iterator is an "all". This is important, as it puts it in -// the class of "all iterators. -func (it *Int64AllIterator) Type() string { return "all" } - -// There's nothing to optimize about this little iterator. -func (it *Int64AllIterator) Optimize() (Iterator, bool) { return it, false } - -// Stats for an Int64AllIterator are simple. Super cheap to do any operation, -// and as big as the range. -func (it *Int64AllIterator) GetStats() *IteratorStats { - s, _ := it.Size() - return &IteratorStats{ - CheckCost: 1, - NextCost: 1, - Size: s, - } -} diff --git a/graph/and_iterator.go b/graph/and_iterator.go deleted file mode 100644 index 0100708..0000000 --- a/graph/and_iterator.go +++ /dev/null @@ -1,246 +0,0 @@ -// Defines the And iterator, one of the base iterators. And requires no -// knowledge of the constituent TripleStore; its sole purpose is to act as an -// intersection operator across the subiterators it is given. If one iterator -// contains [1,3,5] and another [2,3,4] -- then And is an iterator that -// 'contains' [3] -// -// It accomplishes this in one of two ways. If it is a Next()ed iterator (that -// is, it is a top level iterator, or on the "Next() path", then it will Next() -// it's primary iterator (helpfully, and.primary_it) and Check() the resultant -// value against it's other iterators. If it matches all of them, then it -// returns that value. Otherwise, it repeats the process. -// -// If it's on a Check() path, it merely Check()s every iterator, and returns the -// logical AND of each result. - -package graph - -import ( - "fmt" - "strings" -) - -// The And iterator. Consists of a BaseIterator and a number of subiterators, the primary of which will -// be Next()ed if next is called. -type AndIterator struct { - BaseIterator - internalIterators []Iterator - itCount int - primaryIt Iterator - checkList []Iterator -} - -// Creates a new And iterator. -func NewAndIterator() *AndIterator { - var and AndIterator - BaseIteratorInit(&and.BaseIterator) - and.internalIterators = make([]Iterator, 0, 20) - and.checkList = nil - return &and -} - -// Reset all internal iterators -func (it *AndIterator) Reset() { - it.primaryIt.Reset() - for _, sub := range it.internalIterators { - sub.Reset() - } - it.checkList = nil -} - -func (it *AndIterator) Clone() Iterator { - and := NewAndIterator() - and.AddSubIterator(it.primaryIt.Clone()) - and.CopyTagsFrom(it) - for _, sub := range it.internalIterators { - and.AddSubIterator(sub.Clone()) - } - if it.checkList != nil { - and.optimizeCheck() - } - return and -} - -// Returns a slice of the subiterators, in order (primary iterator first). -func (it *AndIterator) GetSubIterators() []Iterator { - iters := make([]Iterator, len(it.internalIterators)+1) - iters[0] = it.primaryIt - copy(iters[1:], it.internalIterators) - return iters -} - -// Overrides BaseIterator TagResults, as it needs to add it's own results and -// recurse down it's subiterators. -func (it *AndIterator) TagResults(out *map[string]TSVal) { - it.BaseIterator.TagResults(out) - if it.primaryIt != nil { - it.primaryIt.TagResults(out) - } - for _, sub := range it.internalIterators { - sub.TagResults(out) - } -} - -// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators. -func (it *AndIterator) GetResultTree() *ResultTree { - tree := NewResultTree(it.LastResult()) - tree.AddSubtree(it.primaryIt.GetResultTree()) - for _, sub := range it.internalIterators { - tree.AddSubtree(sub.GetResultTree()) - } - return tree -} - -// Prints information about this iterator. -func (it *AndIterator) DebugString(indent int) string { - var total string - for i, sub := range it.internalIterators { - total += strings.Repeat(" ", indent+2) - total += fmt.Sprintf("%d:\n%s\n", i, sub.DebugString(indent+4)) - } - var tags string - for _, k := range it.Tags() { - tags += fmt.Sprintf("%s;", k) - } - spaces := strings.Repeat(" ", indent+2) - - return fmt.Sprintf("%s(%s %d\n%stags:%s\n%sprimary_it:\n%s\n%sother_its:\n%s)", - strings.Repeat(" ", indent), - it.Type(), - it.GetUid(), - spaces, - tags, - spaces, - it.primaryIt.DebugString(indent+4), - spaces, - total) -} - -// Add a subiterator to this And iterator. -// -// The first iterator that is added becomes the primary iterator. This is -// important. Calling Optimize() is the way to change the order based on -// subiterator statistics. Without Optimize(), the order added is the order -// used. -func (it *AndIterator) AddSubIterator(sub Iterator) { - if it.itCount > 0 { - it.internalIterators = append(it.internalIterators, sub) - it.itCount++ - return - } - it.primaryIt = sub - it.itCount++ -} - -// Returns the Next value from the And iterator. Because the And is the -// intersection of its subiterators, it must choose one subiterator to produce a -// candidate, and check this value against the subiterators. A productive choice -// of primary iterator is therefore very important. -func (it *AndIterator) Next() (TSVal, bool) { - NextLogIn(it) - var curr TSVal - var exists bool - for { - curr, exists = it.primaryIt.Next() - if !exists { - return NextLogOut(it, nil, false) - } - if it.checkSubIts(curr) { - it.Last = curr - return NextLogOut(it, curr, true) - } - } - panic("Somehow broke out of Next() loop in AndIterator") -} - -// Checks a value against the non-primary iterators, in order. -func (it *AndIterator) checkSubIts(val TSVal) bool { - var subIsGood = true - for _, sub := range it.internalIterators { - subIsGood = sub.Check(val) - if !subIsGood { - break - } - } - return subIsGood -} - -func (it *AndIterator) checkCheckList(val TSVal) bool { - ok := true - for _, c := range it.checkList { - ok = c.Check(val) - if !ok { - break - } - } - if ok { - it.Last = val - } - return CheckLogOut(it, val, ok) -} - -// Check a value against the entire iterator, in order. -func (it *AndIterator) Check(val TSVal) bool { - CheckLogIn(it, val) - if it.checkList != nil { - return it.checkCheckList(val) - } - mainGood := it.primaryIt.Check(val) - if !mainGood { - return CheckLogOut(it, val, false) - } - othersGood := it.checkSubIts(val) - if !othersGood { - return CheckLogOut(it, val, false) - } - it.Last = val - return CheckLogOut(it, val, true) -} - -// Returns the approximate size of the And iterator. Because we're dealing -// with an intersection, we know that the largest we can be is the size of the -// smallest iterator. This is the heuristic we shall follow. Better heuristics -// welcome. -func (it *AndIterator) Size() (int64, bool) { - val, b := it.primaryIt.Size() - for _, sub := range it.internalIterators { - newval, newb := sub.Size() - if val > newval { - val = newval - } - b = newb && b - } - return val, b -} - -// An And has no NextResult of its own -- that is, there are no other values -// which satisfy our previous result that are not the result itself. Our -// subiterators might, however, so just pass the call recursively. -func (it *AndIterator) NextResult() bool { - if it.primaryIt.NextResult() { - return true - } - for _, sub := range it.internalIterators { - if sub.NextResult() { - return true - } - } - return false -} - -// Perform and-specific cleanup, of which there currently is none. -func (it *AndIterator) cleanUp() {} - -// Close this iterator, and, by extension, close the subiterators. -// Close should be idempotent, and it follows that if it's subiterators -// follow this contract, the And follows the contract. -func (it *AndIterator) Close() { - it.cleanUp() - it.primaryIt.Close() - for _, sub := range it.internalIterators { - sub.Close() - } -} - -// Register this as an "and" iterator. -func (it *AndIterator) Type() string { return "and" } diff --git a/graph/and_iterator_optimize.go b/graph/and_iterator_optimize.go deleted file mode 100644 index 1850865..0000000 --- a/graph/and_iterator_optimize.go +++ /dev/null @@ -1,315 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "sort" -) - -// Perhaps the most tricky file in this entire module. Really a method on the -// AndIterator, but important enough to deserve its own file. -// -// Calling Optimize() on an And iterator, like any iterator, requires that we -// preserve the underlying meaning. However, the And has many choices, namely, -// which one of it's subiterators will be the branch that does the Next()ing, -// and which ordering of the remaining iterators is the most efficient. In -// short, this is where a lot of the query optimization happens, and there are -// many wins to be had here, as well as many bad bugs. The worst class of bug -// changes the meaning of the query. The second worst class makes things really -// slow. -// -// The good news is this: If Optimize() is never called (turned off, perhaps) we can -// be sure the results are as good as the query language called for. -// -// In short, tread lightly. - -// Optimizes the AndIterator, by picking the most efficient way to Next() and -// Check() its subiterators. For SQL fans, this is equivalent to JOIN. -func (it *AndIterator) Optimize() (Iterator, bool) { - // First, let's get the slice of iterators, in order (first one is Next()ed, - // the rest are Check()ed) - old := it.GetSubIterators() - - // And call Optimize() on our subtree, replacing each one in the order we - // found them. it_list is the newly optimized versions of these, and changed - // is another list, of only the ones that have returned replacements and - // changed. - its := optimizeSubIterators(old) - - // Close the replaced iterators (they ought to close themselves, but Close() - // is idempotent, so this just protects against any machinations). - closeIteratorList(old, nil) - - // If we can find only one subiterator which is equivalent to this whole and, - // we can replace the And... - out := it.optimizeReplacement(its) - if out != nil { - // ...Move the tags to the replacement... - moveTagsTo(out, it) - // ...Close everyone except `out`, our replacement... - closeIteratorList(its, out) - // ...And return it. - return out, true - } - - // And now, without changing any of the iterators, we reorder them. it_list is - // now a permutation of itself, but the contents are unchanged. - its = optimizeOrder(its) - - // Okay! At this point we have an optimized order. - - // The easiest thing to do at this point is merely to create a new And iterator - // and replace ourselves with our (reordered, optimized) clone. - newAnd := NewAndIterator() - - // Add the subiterators in order. - for _, sub := range its { - newAnd.AddSubIterator(sub) - } - - // Move the tags hanging on us (like any good replacement). - newAnd.CopyTagsFrom(it) - - newAnd.optimizeCheck() - - // And close ourselves but not our subiterators -- some may still be alive in - // the new And (they were unchanged upon calling Optimize() on them, at the - // start). - it.cleanUp() - return newAnd, true -} - -// Closes a list of iterators, except the one passed in `except`. Closes all -// of the iterators in the list if `except` is nil. -func closeIteratorList(its []Iterator, except Iterator) { - for _, it := range its { - if it != except { - it.Close() - } - } -} - -// Find if there is a single subiterator which is a valid replacement for this -// AndIterator. -func (_ *AndIterator) optimizeReplacement(its []Iterator) Iterator { - // If we were created with no SubIterators, we're as good as Null. - if len(its) == 0 { - return &NullIterator{} - } - if len(its) == 1 { - // When there's only one iterator, there's only one choice. - return its[0] - } - // If any of our subiterators, post-optimization, are also Null, then - // there's no point in continuing the branch, we will have no results - // and we are null as well. - if hasAnyNullIterators(its) { - return &NullIterator{} - } - - // If we have one useful iterator, use that. - it := hasOneUsefulIterator(its) - if it != nil { - return it - } - return nil -} - -// optimizeOrder(l) takes a list and returns a list, containing the same contents -// but with a new ordering, however it wishes. -func optimizeOrder(its []Iterator) []Iterator { - var ( - // bad contains iterators that can't be (efficiently) nexted, such as - // "optional" or "not". Separate them out and tack them on at the end. - out, bad []Iterator - best Iterator - bestCost = int64(1 << 62) - ) - - // Find the iterator with the projected "best" total cost. - // Total cost is defined as The Next()ed iterator's cost to Next() out - // all of it's contents, and to Check() each of those against everyone - // else. - for _, it := range its { - if !it.Nextable() { - bad = append(bad, it) - continue - } - rootStats := it.GetStats() - cost := rootStats.NextCost - for _, f := range its { - if !f.Nextable() { - continue - } - if f == it { - continue - } - stats := f.GetStats() - cost += stats.CheckCost - } - cost *= rootStats.Size - if cost < bestCost { - best = it - bestCost = cost - } - } - - // TODO(barakmich): Optimization of order need not stop here. Picking a smart - // Check() order based on probability of getting a false Check() first is - // useful (fail faster). - - // Put the best iterator (the one we wish to Next()) at the front... - out = append(out, best) - - // ... push everyone else after... - for _, it := range its { - if !it.Nextable() { - continue - } - if it != best { - out = append(out, it) - } - } - - // ...and finally, the difficult children on the end. - return append(out, bad...) -} - -type byCost []Iterator - -func (c byCost) Len() int { return len(c) } -func (c byCost) Less(i, j int) bool { return c[i].GetStats().CheckCost < c[j].GetStats().CheckCost } -func (c byCost) Swap(i, j int) { c[i], c[j] = c[j], c[i] } - -// optimizeCheck(l) creates an alternate check list, containing the same contents -// but with a new ordering, however it wishes. -func (it *AndIterator) optimizeCheck() { - // GetSubIterators allocates, so this is currently safe. - // TODO(kortschak) Reuse it.checkList if possible. - // This involves providing GetSubIterators with a slice to fill. - // Generally this is a worthwhile thing to do in other places as well. - it.checkList = it.GetSubIterators() - sort.Sort(byCost(it.checkList)) -} - -// If we're replacing ourselves by a single iterator, we need to grab the -// result tags from the iterators that, while still valid and would hold -// the same values as this and, are not going to stay. -// getSubTags() returns a map of the tags for all the subiterators. -func (it *AndIterator) getSubTags() map[string]struct{} { - tags := make(map[string]struct{}) - for _, sub := range it.GetSubIterators() { - for _, tag := range sub.Tags() { - tags[tag] = struct{}{} - } - } - for _, tag := range it.Tags() { - tags[tag] = struct{}{} - } - return tags -} - -// moveTagsTo() gets the tags for all of the src's subiterators and the -// src itself, and moves them to dst. -func moveTagsTo(dst Iterator, src *AndIterator) { - tags := src.getSubTags() - for _, tag := range dst.Tags() { - if _, ok := tags[tag]; ok { - delete(tags, tag) - } - } - for k := range tags { - dst.AddTag(k) - } -} - -// optimizeSubIterators(l) takes a list of iterators and calls Optimize() on all -// of them. It returns two lists -- the first contains the same list as l, where -// any replacements are made by Optimize() and the second contains the originals -// which were replaced. -func optimizeSubIterators(its []Iterator) []Iterator { - var optIts []Iterator - for _, it := range its { - o, changed := it.Optimize() - if changed { - optIts = append(optIts, o) - } else { - optIts = append(optIts, it.Clone()) - } - } - return optIts -} - -// Check a list of iterators for any Null iterators. -func hasAnyNullIterators(its []Iterator) bool { - for _, it := range its { - if it.Type() == "null" { - return true - } - } - return false -} - -// There are two "not-useful" iterators -- namely "null" which returns -// nothing, and "all" which returns everything. Particularly, we want -// to see if we're intersecting with a bunch of "all" iterators, and, -// if we are, then we have only one useful iterator. -func hasOneUsefulIterator(its []Iterator) Iterator { - usefulCount := 0 - var usefulIt Iterator - for _, it := range its { - switch it.Type() { - case "null", "all": - continue - case "optional": - // Optional is weird -- it's not useful, but we can't optimize - // away from it. Therefore, we skip this optimization - // if we see one. - return nil - default: - usefulCount++ - usefulIt = it - } - } - - if usefulCount == 1 { - return usefulIt - } - return nil -} - -// and.GetStats() lives here in and-iterator-optimize.go because it may -// in the future return different statistics based on how it is optimized. -// For now, however, it's pretty static. -func (it *AndIterator) GetStats() *IteratorStats { - primaryStats := it.primaryIt.GetStats() - CheckCost := primaryStats.CheckCost - NextCost := primaryStats.NextCost - Size := primaryStats.Size - for _, sub := range it.internalIterators { - stats := sub.GetStats() - NextCost += stats.CheckCost - CheckCost += stats.CheckCost - if Size > stats.Size { - Size = stats.Size - } - } - return &IteratorStats{ - CheckCost: CheckCost, - NextCost: NextCost, - Size: Size, - } - -} diff --git a/graph/and_iterator_optimize_test.go b/graph/and_iterator_optimize_test.go deleted file mode 100644 index 8f8955b..0000000 --- a/graph/and_iterator_optimize_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Tests relating to methods in and-iterator-optimize. Many are pretty simplistic, but -// nonetheless cover a lot of basic cases. - -import ( - "reflect" - "sort" - "testing" -) - -func TestIteratorPromotion(t *testing.T) { - all := NewInt64AllIterator(1, 3) - fixed := newFixedIterator() - fixed.AddValue(3) - a := NewAndIterator() - a.AddSubIterator(all) - a.AddSubIterator(fixed) - all.AddTag("a") - fixed.AddTag("b") - a.AddTag("c") - newIt, changed := a.Optimize() - if !changed { - t.Error("Iterator didn't optimize") - } - if newIt.Type() != "fixed" { - t.Error("Expected fixed iterator") - } - tagsExpected := []string{"a", "b", "c"} - tags := newIt.Tags() - sort.Strings(tags) - if !reflect.DeepEqual(tags, tagsExpected) { - t.Fatal("Tags don't match") - } -} - -func TestNullIteratorAnd(t *testing.T) { - all := NewInt64AllIterator(1, 3) - null := NewNullIterator() - a := NewAndIterator() - a.AddSubIterator(all) - a.AddSubIterator(null) - newIt, changed := a.Optimize() - if !changed { - t.Error("Didn't change") - } - if newIt.Type() != "null" { - t.Error("Expected null iterator, got ", newIt.Type()) - } -} - -func TestReorderWithTag(t *testing.T) { - all := NewInt64AllIterator(100, 300) - all.AddTag("good") - all2 := NewInt64AllIterator(1, 30000) - all2.AddTag("slow") - a := NewAndIterator() - // Make all2 the default iterator - a.AddSubIterator(all2) - a.AddSubIterator(all) - - newIt, changed := a.Optimize() - if !changed { - t.Error("Expected new iterator") - } - expectedTags := []string{"good", "slow"} - tagsOut := make([]string, 0) - for _, sub := range newIt.GetSubIterators() { - for _, x := range sub.Tags() { - tagsOut = append(tagsOut, x) - } - } - if !reflect.DeepEqual(expectedTags, tagsOut) { - t.Fatal("Tags don't match") - } -} - -func TestAndStatistics(t *testing.T) { - all := NewInt64AllIterator(100, 300) - all.AddTag("good") - all2 := NewInt64AllIterator(1, 30000) - all2.AddTag("slow") - a := NewAndIterator() - // Make all2 the default iterator - a.AddSubIterator(all2) - a.AddSubIterator(all) - stats1 := a.GetStats() - newIt, changed := a.Optimize() - if !changed { - t.Error("Didn't optimize") - } - stats2 := newIt.GetStats() - if stats2.NextCost > stats1.NextCost { - t.Error("And didn't optimize. Next cost old ", stats1.NextCost, "and new ", stats2.NextCost) - } -} diff --git a/graph/and_iterator_test.go b/graph/and_iterator_test.go deleted file mode 100644 index d0fbf2e..0000000 --- a/graph/and_iterator_test.go +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" -) - -// Make sure that tags work on the And. -func TestTag(t *testing.T) { - fix1 := newFixedIterator() - fix1.AddValue(234) - fix1.AddTag("foo") - and := NewAndIterator() - and.AddSubIterator(fix1) - and.AddTag("bar") - out := fix1.Tags() - if len(out) != 1 { - t.Errorf("Expected length 1, got %d", len(out)) - } - if out[0] != "foo" { - t.Errorf("Cannot get tag back, got %s", out[0]) - } - - val, ok := and.Next() - if !ok { - t.Errorf("And did not next") - } - if val != 234 { - t.Errorf("Unexpected value") - } - tags := make(map[string]TSVal) - and.TagResults(&tags) - if tags["bar"] != 234 { - t.Errorf("no bar tag") - } - if tags["foo"] != 234 { - t.Errorf("no foo tag") - } -} - -// Do a simple itersection of fixed values. -func TestAndAndFixedIterators(t *testing.T) { - fix1 := newFixedIterator() - fix1.AddValue(1) - fix1.AddValue(2) - fix1.AddValue(3) - fix1.AddValue(4) - fix2 := newFixedIterator() - fix2.AddValue(3) - fix2.AddValue(4) - fix2.AddValue(5) - and := NewAndIterator() - and.AddSubIterator(fix1) - and.AddSubIterator(fix2) - // Should be as big as smallest subiterator - size, accurate := and.Size() - if size != 3 { - t.Error("Incorrect size") - } - if !accurate { - t.Error("not accurate") - } - - val, ok := and.Next() - if val != 3 || ok == false { - t.Error("Incorrect first value") - } - - val, ok = and.Next() - if val != 4 || ok == false { - t.Error("Incorrect second value") - } - - val, ok = and.Next() - if ok { - t.Error("Too many values") - } - -} - -// If there's no intersection, the size should still report the same, -// but there should be nothing to Next() -func TestNonOverlappingFixedIterators(t *testing.T) { - fix1 := newFixedIterator() - fix1.AddValue(1) - fix1.AddValue(2) - fix1.AddValue(3) - fix1.AddValue(4) - fix2 := newFixedIterator() - fix2.AddValue(5) - fix2.AddValue(6) - fix2.AddValue(7) - and := NewAndIterator() - and.AddSubIterator(fix1) - and.AddSubIterator(fix2) - // Should be as big as smallest subiterator - size, accurate := and.Size() - if size != 3 { - t.Error("Incorrect size") - } - if !accurate { - t.Error("not accurate") - } - - _, ok := and.Next() - if ok { - t.Error("Too many values") - } - -} - -func TestAllIterators(t *testing.T) { - all1 := NewInt64AllIterator(1, 5) - all2 := NewInt64AllIterator(4, 10) - and := NewAndIterator() - and.AddSubIterator(all2) - and.AddSubIterator(all1) - - val, ok := and.Next() - if val.(int64) != 4 || ok == false { - t.Error("Incorrect first value") - } - - val, ok = and.Next() - if val.(int64) != 5 || ok == false { - t.Error("Incorrect second value") - } - - val, ok = and.Next() - if ok { - t.Error("Too many values") - } - -} diff --git a/graph/fixed_iterator.go b/graph/fixed_iterator.go deleted file mode 100644 index 3d9eeb4..0000000 --- a/graph/fixed_iterator.go +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the Fixed iterator. A fixed iterator is quite simple; it -// contains an explicit fixed array of values. -// -// A fixed iterator requires an Equality function to be passed to it, by reason that TSVal, the -// opaque Triple store value, may not answer to ==. - -import ( - "fmt" - "strings" -) - -// A Fixed iterator consists of it's values, an index (where it is in the process of Next()ing) and -// an equality function. -type FixedIterator struct { - BaseIterator - values []TSVal - lastIndex int - cmp Equality -} - -// Define the signature of an equality function. -type Equality func(a, b TSVal) bool - -// Define an equality function of purely ==, which works for native types. -func BasicEquality(a, b TSVal) bool { - if a == b { - return true - } - return false -} - -// Creates a new Fixed iterator based around == equality. -func newFixedIterator() *FixedIterator { - return NewFixedIteratorWithCompare(BasicEquality) -} - -// Creates a new Fixed iterator with a custom comparitor. -func NewFixedIteratorWithCompare(compareFn Equality) *FixedIterator { - var it FixedIterator - BaseIteratorInit(&it.BaseIterator) - it.values = make([]TSVal, 0, 20) - it.lastIndex = 0 - it.cmp = compareFn - return &it -} - -func (it *FixedIterator) Reset() { - it.lastIndex = 0 -} - -func (it *FixedIterator) Close() {} - -func (it *FixedIterator) Clone() Iterator { - out := NewFixedIteratorWithCompare(it.cmp) - for _, val := range it.values { - out.AddValue(val) - } - out.CopyTagsFrom(it) - return out -} - -// Add a value to the iterator. The array now contains this value. -// TODO(barakmich): This ought to be a set someday, disallowing repeated values. -func (it *FixedIterator) AddValue(v TSVal) { - it.values = append(it.values, v) -} - -// Print some information about the iterator. -func (it *FixedIterator) DebugString(indent int) string { - value := "" - if len(it.values) > 0 { - value = fmt.Sprint(it.values[0]) - } - return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)", - strings.Repeat(" ", indent), - it.Type(), - it.FixedTags(), - len(it.values), - value, - ) -} - -// Register this iterator as a Fixed iterator. -func (it *FixedIterator) Type() string { - return "fixed" -} - -// Check if the passed value is equal to one of the values stored in the iterator. -func (it *FixedIterator) Check(v TSVal) bool { - // Could be optimized by keeping it sorted or using a better datastructure. - // However, for fixed iterators, which are by definition kind of tiny, this - // isn't a big issue. - CheckLogIn(it, v) - for _, x := range it.values { - if it.cmp(x, v) { - it.Last = x - return CheckLogOut(it, v, true) - } - } - return CheckLogOut(it, v, false) -} - -// Return the next stored value from the iterator. -func (it *FixedIterator) Next() (TSVal, bool) { - NextLogIn(it) - if it.lastIndex == len(it.values) { - return NextLogOut(it, nil, false) - } - out := it.values[it.lastIndex] - it.Last = out - it.lastIndex++ - return NextLogOut(it, out, true) -} - -// Optimize() for a Fixed iterator is simple. Returns a Null iterator if it's empty -// (so that other iterators upstream can treat this as null) or there is no -// optimization. -func (it *FixedIterator) Optimize() (Iterator, bool) { - - if len(it.values) == 1 && it.values[0] == nil { - return &NullIterator{}, true - } - - return it, false -} - -// Size is the number of values stored. -func (it *FixedIterator) Size() (int64, bool) { - return int64(len(it.values)), true -} - -// As we right now have to scan the entire list, Next and Check are linear with the -// size. However, a better data structure could remove these limits. -func (it *FixedIterator) GetStats() *IteratorStats { - return &IteratorStats{ - CheckCost: int64(len(it.values)), - NextCost: int64(len(it.values)), - Size: int64(len(it.values)), - } -} diff --git a/graph/hasa_iterator.go b/graph/hasa_iterator.go deleted file mode 100644 index 16088c8..0000000 --- a/graph/hasa_iterator.go +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the HasA iterator. The HasA takes a -// subiterator of links, and acts as an iterator of nodes in the given -// direction. The name comes from the idea that a "link HasA subject" or a "link -// HasA predicate". -// -// HasA is weird in that it may return the same value twice if on the Next() -// path. That's okay -- in reality, it can be viewed as returning the value for -// a new triple, but to make logic much simpler, here we have the HasA. -// -// Likewise, it's important to think about Check()ing a HasA. When given a -// value to check, it means "Check all predicates that have this value for your -// direction against the subiterator." This would imply that there's more than -// one possibility for the same Check()ed value. While we could return the -// number of options, it's simpler to return one, and then call NextResult() -// enough times to enumerate the options. (In fact, one could argue that the -// raison d'etre for NextResult() is this iterator). -// -// Alternatively, can be seen as the dual of the LinksTo iterator. - -import ( - "fmt" - "strings" - - "github.com/barakmich/glog" -) - -// A HasaIterator consists of a reference back to the TripleStore that it references, -// a primary subiterator, a direction in which the triples for that subiterator point, -// and a temporary holder for the iterator generated on Check(). -type HasaIterator struct { - BaseIterator - ts TripleStore - primaryIt Iterator - dir Direction - resultIt Iterator -} - -// Construct a new HasA iterator, given the triple subiterator, and the triple -// direction for which it stands. -func NewHasaIterator(ts TripleStore, subIt Iterator, d Direction) *HasaIterator { - var hasa HasaIterator - BaseIteratorInit(&hasa.BaseIterator) - hasa.ts = ts - hasa.primaryIt = subIt - hasa.dir = d - return &hasa -} - -// Return our sole subiterator. -func (it *HasaIterator) GetSubIterators() []Iterator { - return []Iterator{it.primaryIt} -} - -func (it *HasaIterator) Reset() { - it.primaryIt.Reset() - if it.resultIt != nil { - it.resultIt.Close() - } -} - -func (it *HasaIterator) Clone() Iterator { - out := NewHasaIterator(it.ts, it.primaryIt.Clone(), it.dir) - out.CopyTagsFrom(it) - return out -} - -// Direction accessor. -func (it *HasaIterator) Direction() Direction { return it.dir } - -// Pass the Optimize() call along to the subiterator. If it becomes Null, -// then the HasA becomes Null (there are no triples that have any directions). -func (it *HasaIterator) Optimize() (Iterator, bool) { - newPrimary, changed := it.primaryIt.Optimize() - if changed { - it.primaryIt = newPrimary - if it.primaryIt.Type() == "null" { - return it.primaryIt, true - } - } - return it, false -} - -// Pass the TagResults down the chain. -func (it *HasaIterator) TagResults(out *map[string]TSVal) { - it.BaseIterator.TagResults(out) - it.primaryIt.TagResults(out) -} - -// DEPRECATED Return results in a ResultTree. -func (it *HasaIterator) GetResultTree() *ResultTree { - tree := NewResultTree(it.LastResult()) - tree.AddSubtree(it.primaryIt.GetResultTree()) - return tree -} - -// Print some information about this iterator. -func (it *HasaIterator) DebugString(indent int) string { - var tags string - for _, k := range it.Tags() { - tags += fmt.Sprintf("%s;", k) - } - return fmt.Sprintf("%s(%s %d tags:%s direction:%s\n%s)", strings.Repeat(" ", indent), it.Type(), it.GetUid(), tags, it.dir, it.primaryIt.DebugString(indent+4)) -} - -// Check a value against our internal iterator. In order to do this, we must first open a new -// iterator of "triples that have `val` in our direction", given to us by the triple store, -// and then Next() values out of that iterator and Check() them against our subiterator. -func (it *HasaIterator) Check(val TSVal) bool { - CheckLogIn(it, val) - if glog.V(4) { - glog.V(4).Infoln("Id is", it.ts.GetNameFor(val)) - } - // TODO(barakmich): Optimize this - if it.resultIt != nil { - it.resultIt.Close() - } - it.resultIt = it.ts.GetTripleIterator(it.dir, val) - return CheckLogOut(it, val, it.GetCheckResult()) -} - -// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the -// result iterator (a triple iterator based on the last checked value) and returns true if -// another match is made. -func (it *HasaIterator) GetCheckResult() bool { - for { - linkVal, ok := it.resultIt.Next() - if !ok { - break - } - if glog.V(4) { - glog.V(4).Infoln("Triple is", it.ts.GetTriple(linkVal).ToString()) - } - if it.primaryIt.Check(linkVal) { - it.Last = it.ts.GetTripleDirection(linkVal, it.dir) - return true - } - } - return false -} - -// Get the next result that matches this branch. -func (it *HasaIterator) NextResult() bool { - // Order here is important. If the subiterator has a NextResult, then we - // need do nothing -- there is a next result, and we shouldn't move forward. - // However, we then need to get the next result from our last Check(). - // - // The upshot is, the end of NextResult() bubbles up from the bottom of the - // iterator tree up, and we need to respect that. - if it.primaryIt.NextResult() { - return true - } - return it.GetCheckResult() -} - -// Get the next result from this iterator. This is simpler than Check. We have a -// subiterator we can get a value from, and we can take that resultant triple, -// pull our direction out of it, and return that. -func (it *HasaIterator) Next() (TSVal, bool) { - NextLogIn(it) - if it.resultIt != nil { - it.resultIt.Close() - } - it.resultIt = &NullIterator{} - - tID, ok := it.primaryIt.Next() - if !ok { - return NextLogOut(it, 0, false) - } - name := it.ts.GetTriple(tID).Get(it.dir) - val := it.ts.GetIdFor(name) - it.Last = val - return NextLogOut(it, val, true) -} - -// GetStats() returns the statistics on the HasA iterator. This is curious. Next -// cost is easy, it's an extra call or so on top of the subiterator Next cost. -// CheckCost involves going to the TripleStore, iterating out values, and hoping -// one sticks -- potentially expensive, depending on fanout. Size, however, is -// potentially smaller. we know at worst it's the size of the subiterator, but -// if there are many repeated values, it could be much smaller in totality. -func (it *HasaIterator) GetStats() *IteratorStats { - subitStats := it.primaryIt.GetStats() - // TODO(barakmich): These should really come from the triplestore itself - // and be optimized. - faninFactor := int64(1) - fanoutFactor := int64(30) - nextConstant := int64(2) - tripleConstant := int64(1) - return &IteratorStats{ - NextCost: tripleConstant + subitStats.NextCost, - CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost, - Size: faninFactor * subitStats.Size, - } -} - -// Close the subiterator, the result iterator (if any) and the HasA. -func (it *HasaIterator) Close() { - if it.resultIt != nil { - it.resultIt.Close() - } - it.primaryIt.Close() -} - -// Register this iterator as a HasA. -func (it *HasaIterator) Type() string { return "hasa" } diff --git a/graph/iterator.go b/graph/iterator.go index 0bffb01..fc17eb7 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -18,7 +18,6 @@ package graph // iterators can "inherit" from to get default iterator functionality. import ( - "fmt" "strings" "github.com/barakmich/glog" @@ -104,170 +103,17 @@ type Iterator interface { GetUid() int } +type FixedIterator interface { + Iterator + AddValue(TSVal) +} + type IteratorStats struct { CheckCost int64 NextCost int64 Size int64 } -// The Base iterator is the iterator other iterators inherit from to get some -// default functionality. -type BaseIterator struct { - Last TSVal - tags []string - fixedTags map[string]TSVal - nextable bool - uid int -} - -// Called by subclases. -func BaseIteratorInit(it *BaseIterator) { - // Your basic iterator is nextable - it.nextable = true - it.uid = iterator_n - if glog.V(2) { - iterator_n++ - } -} - -func (it *BaseIterator) GetUid() int { - return it.uid -} - -// Adds a tag to the iterator. Most iterators don't need to override. -func (it *BaseIterator) AddTag(tag string) { - if it.tags == nil { - it.tags = make([]string, 0) - } - it.tags = append(it.tags, tag) -} - -func (it *BaseIterator) AddFixedTag(tag string, value TSVal) { - if it.fixedTags == nil { - it.fixedTags = make(map[string]TSVal) - } - it.fixedTags[tag] = value -} - -// Returns the tags. -func (it *BaseIterator) Tags() []string { - return it.tags -} - -func (it *BaseIterator) FixedTags() map[string]TSVal { - return it.fixedTags -} - -func (it *BaseIterator) CopyTagsFrom(other_it Iterator) { - for _, tag := range other_it.Tags() { - it.AddTag(tag) - } - - for k, v := range other_it.FixedTags() { - it.AddFixedTag(k, v) - } - -} - -// Prints a silly debug string. Most classes override. -func (it *BaseIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(base)", strings.Repeat(" ", indent)) -} - -// Nothing in a base iterator. -func (it *BaseIterator) Check(v TSVal) bool { - return false -} - -// Base iterators should never appear in a tree if they are, select against -// them. -func (it *BaseIterator) GetStats() *IteratorStats { - return &IteratorStats{100000, 100000, 100000} -} - -// DEPRECATED -func (it *BaseIterator) GetResultTree() *ResultTree { - tree := NewResultTree(it.LastResult()) - return tree -} - -// Nothing in a base iterator. -func (it *BaseIterator) Next() (TSVal, bool) { - return nil, false -} - -func (it *BaseIterator) NextResult() bool { - return false -} - -// Returns the last result of an iterator. -func (it *BaseIterator) LastResult() TSVal { - return it.Last -} - -// If you're empty and you know it, clap your hands. -func (it *BaseIterator) Size() (int64, bool) { - return 0, true -} - -// No subiterators. Only those with subiterators need to do anything here. -func (it *BaseIterator) GetSubIterators() []Iterator { - return nil -} - -// Accessor -func (it *BaseIterator) Nextable() bool { return it.nextable } - -// Fill the map based on the tags assigned to this iterator. Default -// functionality works well for most iterators. -func (it *BaseIterator) TagResults(out_map *map[string]TSVal) { - for _, tag := range it.Tags() { - (*out_map)[tag] = it.LastResult() - } - - for tag, value := range it.FixedTags() { - (*out_map)[tag] = value - } -} - -// Nothing to clean up. -// func (it *BaseIterator) Close() {} - -func (it *NullIterator) Close() {} - -func (it *BaseIterator) Reset() {} - -// Here we define the simplest base iterator -- the Null iterator. It contains nothing. -// It is the empty set. Often times, queries that contain one of these match nothing, -// so it's important to give it a special iterator. -type NullIterator struct { - BaseIterator -} - -// Fairly useless New function. -func NewNullIterator() *NullIterator { - return &NullIterator{} -} - -func (it *NullIterator) Clone() Iterator { return NewNullIterator() } - -// Name the null iterator. -func (it *NullIterator) Type() string { return "null" } - -// A good iterator will close itself when it returns true. -// Null has nothing it needs to do. -func (it *NullIterator) Optimize() (Iterator, bool) { return it, false } - -// Print the null iterator. -func (it *NullIterator) DebugString(indent int) string { - return strings.Repeat(" ", indent) + "(null)" -} - -// A null iterator costs nothing. Use it! -func (it *NullIterator) GetStats() *IteratorStats { - return &IteratorStats{0, 0, 0} -} - // Utility logging functions for when an iterator gets called Next upon, or Check upon, as // well as what they return. Highly useful for tracing the execution path of a query. func CheckLogIn(it Iterator, val TSVal) { diff --git a/graph/iterator/all_iterator.go b/graph/iterator/all_iterator.go new file mode 100644 index 0000000..327ee77 --- /dev/null +++ b/graph/iterator/all_iterator.go @@ -0,0 +1,118 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// Defines one of the base iterators, the All iterator. Which, logically +// enough, represents all nodes or all links in the graph. +// +// This particular file is actually vestigal. It's up to the TripleStore to give +// us an All iterator that represents all things in the graph. So this is +// really the All iterator for the MemTripleStore. That said, it *is* one of +// the base iterators, and it helps just to see it here. + +import ( + "fmt" + "strings" + + "github.com/google/cayley/graph" +) + +// An All iterator across a range of int64 values, from `max` to `min`. +type Int64 struct { + Base + max, min int64 + at int64 +} + +// Creates a new Int64 with the given range. +func NewInt64(min, max int64) *Int64 { + var all Int64 + BaseInit(&all.Base) + all.max = max + all.min = min + all.at = min + return &all +} + +// Start back at the beginning +func (it *Int64) Reset() { + it.at = it.min +} + +func (it *Int64) Close() {} + +func (it *Int64) Clone() graph.Iterator { + out := NewInt64(it.min, it.max) + out.CopyTagsFrom(it) + return out +} + +// Prints the All iterator as just an "all". +func (it *Int64) DebugString(indent int) string { + return fmt.Sprintf("%s(%s tags: %v)", strings.Repeat(" ", indent), it.Type(), it.Tags()) +} + +// Next() on an Int64 all iterator is a simple incrementing counter. +// Return the next integer, and mark it as the result. +func (it *Int64) Next() (graph.TSVal, bool) { + NextLogIn(it) + if it.at == -1 { + return NextLogOut(it, nil, false) + } + val := it.at + it.at = it.at + 1 + if it.at > it.max { + it.at = -1 + } + it.Last = val + return NextLogOut(it, val, true) +} + +// The number of elements in an Int64 is the size of the range. +// The size is exact. +func (it *Int64) Size() (int64, bool) { + Size := ((it.max - it.min) + 1) + return Size, true +} + +// Check() for an Int64 is merely seeing if the passed value is +// withing the range, assuming the value is an int64. +func (it *Int64) Check(tsv graph.TSVal) bool { + CheckLogIn(it, tsv) + v := tsv.(int64) + if it.min <= v && v <= it.max { + it.Last = v + return CheckLogOut(it, v, true) + } + return CheckLogOut(it, v, false) +} + +// The type of this iterator is an "all". This is important, as it puts it in +// the class of "all iterators. +func (it *Int64) Type() string { return "all" } + +// There's nothing to optimize about this little iterator. +func (it *Int64) Optimize() (graph.Iterator, bool) { return it, false } + +// Stats for an Int64 are simple. Super cheap to do any operation, +// and as big as the range. +func (it *Int64) GetStats() *graph.IteratorStats { + s, _ := it.Size() + return &graph.IteratorStats{ + CheckCost: 1, + NextCost: 1, + Size: s, + } +} diff --git a/graph/iterator/and_iterator.go b/graph/iterator/and_iterator.go new file mode 100644 index 0000000..c4df4ff --- /dev/null +++ b/graph/iterator/and_iterator.go @@ -0,0 +1,248 @@ +// Defines the And iterator, one of the base iterators. And requires no +// knowledge of the constituent TripleStore; its sole purpose is to act as an +// intersection operator across the subiterators it is given. If one iterator +// contains [1,3,5] and another [2,3,4] -- then And is an iterator that +// 'contains' [3] +// +// It accomplishes this in one of two ways. If it is a Next()ed iterator (that +// is, it is a top level iterator, or on the "Next() path", then it will Next() +// it's primary iterator (helpfully, and.primary_it) and Check() the resultant +// value against it's other iterators. If it matches all of them, then it +// returns that value. Otherwise, it repeats the process. +// +// If it's on a Check() path, it merely Check()s every iterator, and returns the +// logical AND of each result. + +package iterator + +import ( + "fmt" + "strings" + + "github.com/google/cayley/graph" +) + +// The And iterator. Consists of a Base and a number of subiterators, the primary of which will +// be Next()ed if next is called. +type And struct { + Base + internalIterators []graph.Iterator + itCount int + primaryIt graph.Iterator + checkList []graph.Iterator +} + +// Creates a new And iterator. +func NewAnd() *And { + var and And + BaseInit(&and.Base) + and.internalIterators = make([]graph.Iterator, 0, 20) + and.checkList = nil + return &and +} + +// Reset all internal iterators +func (it *And) Reset() { + it.primaryIt.Reset() + for _, sub := range it.internalIterators { + sub.Reset() + } + it.checkList = nil +} + +func (it *And) Clone() graph.Iterator { + and := NewAnd() + and.AddSubIterator(it.primaryIt.Clone()) + and.CopyTagsFrom(it) + for _, sub := range it.internalIterators { + and.AddSubIterator(sub.Clone()) + } + if it.checkList != nil { + and.optimizeCheck() + } + return and +} + +// Returns a slice of the subiterators, in order (primary iterator first). +func (it *And) GetSubIterators() []graph.Iterator { + iters := make([]graph.Iterator, len(it.internalIterators)+1) + iters[0] = it.primaryIt + copy(iters[1:], it.internalIterators) + return iters +} + +// Overrides Base TagResults, as it needs to add it's own results and +// recurse down it's subiterators. +func (it *And) TagResults(out *map[string]graph.TSVal) { + it.Base.TagResults(out) + if it.primaryIt != nil { + it.primaryIt.TagResults(out) + } + for _, sub := range it.internalIterators { + sub.TagResults(out) + } +} + +// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators. +func (it *And) GetResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.LastResult()) + tree.AddSubtree(it.primaryIt.GetResultTree()) + for _, sub := range it.internalIterators { + tree.AddSubtree(sub.GetResultTree()) + } + return tree +} + +// Prints information about this iterator. +func (it *And) DebugString(indent int) string { + var total string + for i, sub := range it.internalIterators { + total += strings.Repeat(" ", indent+2) + total += fmt.Sprintf("%d:\n%s\n", i, sub.DebugString(indent+4)) + } + var tags string + for _, k := range it.Tags() { + tags += fmt.Sprintf("%s;", k) + } + spaces := strings.Repeat(" ", indent+2) + + return fmt.Sprintf("%s(%s %d\n%stags:%s\n%sprimary_it:\n%s\n%sother_its:\n%s)", + strings.Repeat(" ", indent), + it.Type(), + it.GetUid(), + spaces, + tags, + spaces, + it.primaryIt.DebugString(indent+4), + spaces, + total) +} + +// Add a subiterator to this And iterator. +// +// The first iterator that is added becomes the primary iterator. This is +// important. Calling Optimize() is the way to change the order based on +// subiterator statistics. Without Optimize(), the order added is the order +// used. +func (it *And) AddSubIterator(sub graph.Iterator) { + if it.itCount > 0 { + it.internalIterators = append(it.internalIterators, sub) + it.itCount++ + return + } + it.primaryIt = sub + it.itCount++ +} + +// Returns the Next value from the And iterator. Because the And is the +// intersection of its subiterators, it must choose one subiterator to produce a +// candidate, and check this value against the subiterators. A productive choice +// of primary iterator is therefore very important. +func (it *And) Next() (graph.TSVal, bool) { + NextLogIn(it) + var curr graph.TSVal + var exists bool + for { + curr, exists = it.primaryIt.Next() + if !exists { + return NextLogOut(it, nil, false) + } + if it.checkSubIts(curr) { + it.Last = curr + return NextLogOut(it, curr, true) + } + } + panic("Somehow broke out of Next() loop in And") +} + +// Checks a value against the non-primary iterators, in order. +func (it *And) checkSubIts(val graph.TSVal) bool { + var subIsGood = true + for _, sub := range it.internalIterators { + subIsGood = sub.Check(val) + if !subIsGood { + break + } + } + return subIsGood +} + +func (it *And) checkCheckList(val graph.TSVal) bool { + ok := true + for _, c := range it.checkList { + ok = c.Check(val) + if !ok { + break + } + } + if ok { + it.Last = val + } + return CheckLogOut(it, val, ok) +} + +// Check a value against the entire iterator, in order. +func (it *And) Check(val graph.TSVal) bool { + CheckLogIn(it, val) + if it.checkList != nil { + return it.checkCheckList(val) + } + mainGood := it.primaryIt.Check(val) + if !mainGood { + return CheckLogOut(it, val, false) + } + othersGood := it.checkSubIts(val) + if !othersGood { + return CheckLogOut(it, val, false) + } + it.Last = val + return CheckLogOut(it, val, true) +} + +// Returns the approximate size of the And iterator. Because we're dealing +// with an intersection, we know that the largest we can be is the size of the +// smallest iterator. This is the heuristic we shall follow. Better heuristics +// welcome. +func (it *And) Size() (int64, bool) { + val, b := it.primaryIt.Size() + for _, sub := range it.internalIterators { + newval, newb := sub.Size() + if val > newval { + val = newval + } + b = newb && b + } + return val, b +} + +// An And has no NextResult of its own -- that is, there are no other values +// which satisfy our previous result that are not the result itself. Our +// subiterators might, however, so just pass the call recursively. +func (it *And) NextResult() bool { + if it.primaryIt.NextResult() { + return true + } + for _, sub := range it.internalIterators { + if sub.NextResult() { + return true + } + } + return false +} + +// Perform and-specific cleanup, of which there currently is none. +func (it *And) cleanUp() {} + +// Close this iterator, and, by extension, close the subiterators. +// Close should be idempotent, and it follows that if it's subiterators +// follow this contract, the And follows the contract. +func (it *And) Close() { + it.cleanUp() + it.primaryIt.Close() + for _, sub := range it.internalIterators { + sub.Close() + } +} + +// Register this as an "and" iterator. +func (it *And) Type() string { return "and" } diff --git a/graph/iterator/and_iterator_optimize.go b/graph/iterator/and_iterator_optimize.go new file mode 100644 index 0000000..304607b --- /dev/null +++ b/graph/iterator/and_iterator_optimize.go @@ -0,0 +1,317 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +import ( + "sort" + + "github.com/google/cayley/graph" +) + +// Perhaps the most tricky file in this entire module. Really a method on the +// And, but important enough to deserve its own file. +// +// Calling Optimize() on an And iterator, like any iterator, requires that we +// preserve the underlying meaning. However, the And has many choices, namely, +// which one of it's subiterators will be the branch that does the Next()ing, +// and which ordering of the remaining iterators is the most efficient. In +// short, this is where a lot of the query optimization happens, and there are +// many wins to be had here, as well as many bad bugs. The worst class of bug +// changes the meaning of the query. The second worst class makes things really +// slow. +// +// The good news is this: If Optimize() is never called (turned off, perhaps) we can +// be sure the results are as good as the query language called for. +// +// In short, tread lightly. + +// Optimizes the And, by picking the most efficient way to Next() and +// Check() its subiterators. For SQL fans, this is equivalent to JOIN. +func (it *And) Optimize() (graph.Iterator, bool) { + // First, let's get the slice of iterators, in order (first one is Next()ed, + // the rest are Check()ed) + old := it.GetSubIterators() + + // And call Optimize() on our subtree, replacing each one in the order we + // found them. it_list is the newly optimized versions of these, and changed + // is another list, of only the ones that have returned replacements and + // changed. + its := optimizeSubIterators(old) + + // Close the replaced iterators (they ought to close themselves, but Close() + // is idempotent, so this just protects against any machinations). + closeIteratorList(old, nil) + + // If we can find only one subiterator which is equivalent to this whole and, + // we can replace the And... + out := it.optimizeReplacement(its) + if out != nil { + // ...Move the tags to the replacement... + moveTagsTo(out, it) + // ...Close everyone except `out`, our replacement... + closeIteratorList(its, out) + // ...And return it. + return out, true + } + + // And now, without changing any of the iterators, we reorder them. it_list is + // now a permutation of itself, but the contents are unchanged. + its = optimizeOrder(its) + + // Okay! At this point we have an optimized order. + + // The easiest thing to do at this point is merely to create a new And iterator + // and replace ourselves with our (reordered, optimized) clone. + newAnd := NewAnd() + + // Add the subiterators in order. + for _, sub := range its { + newAnd.AddSubIterator(sub) + } + + // Move the tags hanging on us (like any good replacement). + newAnd.CopyTagsFrom(it) + + newAnd.optimizeCheck() + + // And close ourselves but not our subiterators -- some may still be alive in + // the new And (they were unchanged upon calling Optimize() on them, at the + // start). + it.cleanUp() + return newAnd, true +} + +// Closes a list of iterators, except the one passed in `except`. Closes all +// of the iterators in the list if `except` is nil. +func closeIteratorList(its []graph.Iterator, except graph.Iterator) { + for _, it := range its { + if it != except { + it.Close() + } + } +} + +// Find if there is a single subiterator which is a valid replacement for this +// And. +func (_ *And) optimizeReplacement(its []graph.Iterator) graph.Iterator { + // If we were created with no SubIterators, we're as good as Null. + if len(its) == 0 { + return &Null{} + } + if len(its) == 1 { + // When there's only one iterator, there's only one choice. + return its[0] + } + // If any of our subiterators, post-optimization, are also Null, then + // there's no point in continuing the branch, we will have no results + // and we are null as well. + if hasAnyNullIterators(its) { + return &Null{} + } + + // If we have one useful iterator, use that. + it := hasOneUsefulIterator(its) + if it != nil { + return it + } + return nil +} + +// optimizeOrder(l) takes a list and returns a list, containing the same contents +// but with a new ordering, however it wishes. +func optimizeOrder(its []graph.Iterator) []graph.Iterator { + var ( + // bad contains iterators that can't be (efficiently) nexted, such as + // "optional" or "not". Separate them out and tack them on at the end. + out, bad []graph.Iterator + best graph.Iterator + bestCost = int64(1 << 62) + ) + + // Find the iterator with the projected "best" total cost. + // Total cost is defined as The Next()ed iterator's cost to Next() out + // all of it's contents, and to Check() each of those against everyone + // else. + for _, it := range its { + if !it.Nextable() { + bad = append(bad, it) + continue + } + rootStats := it.GetStats() + cost := rootStats.NextCost + for _, f := range its { + if !f.Nextable() { + continue + } + if f == it { + continue + } + stats := f.GetStats() + cost += stats.CheckCost + } + cost *= rootStats.Size + if cost < bestCost { + best = it + bestCost = cost + } + } + + // TODO(barakmich): Optimization of order need not stop here. Picking a smart + // Check() order based on probability of getting a false Check() first is + // useful (fail faster). + + // Put the best iterator (the one we wish to Next()) at the front... + out = append(out, best) + + // ... push everyone else after... + for _, it := range its { + if !it.Nextable() { + continue + } + if it != best { + out = append(out, it) + } + } + + // ...and finally, the difficult children on the end. + return append(out, bad...) +} + +type byCost []graph.Iterator + +func (c byCost) Len() int { return len(c) } +func (c byCost) Less(i, j int) bool { return c[i].GetStats().CheckCost < c[j].GetStats().CheckCost } +func (c byCost) Swap(i, j int) { c[i], c[j] = c[j], c[i] } + +// optimizeCheck(l) creates an alternate check list, containing the same contents +// but with a new ordering, however it wishes. +func (it *And) optimizeCheck() { + // GetSubIterators allocates, so this is currently safe. + // TODO(kortschak) Reuse it.checkList if possible. + // This involves providing GetSubIterators with a slice to fill. + // Generally this is a worthwhile thing to do in other places as well. + it.checkList = it.GetSubIterators() + sort.Sort(byCost(it.checkList)) +} + +// If we're replacing ourselves by a single iterator, we need to grab the +// result tags from the iterators that, while still valid and would hold +// the same values as this and, are not going to stay. +// getSubTags() returns a map of the tags for all the subiterators. +func (it *And) getSubTags() map[string]struct{} { + tags := make(map[string]struct{}) + for _, sub := range it.GetSubIterators() { + for _, tag := range sub.Tags() { + tags[tag] = struct{}{} + } + } + for _, tag := range it.Tags() { + tags[tag] = struct{}{} + } + return tags +} + +// moveTagsTo() gets the tags for all of the src's subiterators and the +// src itself, and moves them to dst. +func moveTagsTo(dst graph.Iterator, src *And) { + tags := src.getSubTags() + for _, tag := range dst.Tags() { + if _, ok := tags[tag]; ok { + delete(tags, tag) + } + } + for k := range tags { + dst.AddTag(k) + } +} + +// optimizeSubIterators(l) takes a list of iterators and calls Optimize() on all +// of them. It returns two lists -- the first contains the same list as l, where +// any replacements are made by Optimize() and the second contains the originals +// which were replaced. +func optimizeSubIterators(its []graph.Iterator) []graph.Iterator { + var optIts []graph.Iterator + for _, it := range its { + o, changed := it.Optimize() + if changed { + optIts = append(optIts, o) + } else { + optIts = append(optIts, it.Clone()) + } + } + return optIts +} + +// Check a list of iterators for any Null iterators. +func hasAnyNullIterators(its []graph.Iterator) bool { + for _, it := range its { + if it.Type() == "null" { + return true + } + } + return false +} + +// There are two "not-useful" iterators -- namely "null" which returns +// nothing, and "all" which returns everything. Particularly, we want +// to see if we're intersecting with a bunch of "all" iterators, and, +// if we are, then we have only one useful iterator. +func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator { + usefulCount := 0 + var usefulIt graph.Iterator + for _, it := range its { + switch it.Type() { + case "null", "all": + continue + case "optional": + // Optional is weird -- it's not useful, but we can't optimize + // away from it. Therefore, we skip this optimization + // if we see one. + return nil + default: + usefulCount++ + usefulIt = it + } + } + + if usefulCount == 1 { + return usefulIt + } + return nil +} + +// and.GetStats() lives here in and-iterator-optimize.go because it may +// in the future return different statistics based on how it is optimized. +// For now, however, it's pretty static. +func (it *And) GetStats() *graph.IteratorStats { + primaryStats := it.primaryIt.GetStats() + CheckCost := primaryStats.CheckCost + NextCost := primaryStats.NextCost + Size := primaryStats.Size + for _, sub := range it.internalIterators { + stats := sub.GetStats() + NextCost += stats.CheckCost + CheckCost += stats.CheckCost + if Size > stats.Size { + Size = stats.Size + } + } + return &graph.IteratorStats{ + CheckCost: CheckCost, + NextCost: NextCost, + Size: Size, + } + +} diff --git a/graph/iterator/and_iterator_optimize_test.go b/graph/iterator/and_iterator_optimize_test.go new file mode 100644 index 0000000..b57f02f --- /dev/null +++ b/graph/iterator/and_iterator_optimize_test.go @@ -0,0 +1,110 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// Tests relating to methods in and-iterator-optimize. Many are pretty simplistic, but +// nonetheless cover a lot of basic cases. + +import ( + "reflect" + "sort" + "testing" +) + +func TestIteratorPromotion(t *testing.T) { + all := NewInt64(1, 3) + fixed := newFixed() + fixed.AddValue(3) + a := NewAnd() + a.AddSubIterator(all) + a.AddSubIterator(fixed) + all.AddTag("a") + fixed.AddTag("b") + a.AddTag("c") + newIt, changed := a.Optimize() + if !changed { + t.Error("Iterator didn't optimize") + } + if newIt.Type() != "fixed" { + t.Error("Expected fixed iterator") + } + tagsExpected := []string{"a", "b", "c"} + tags := newIt.Tags() + sort.Strings(tags) + if !reflect.DeepEqual(tags, tagsExpected) { + t.Fatal("Tags don't match") + } +} + +func TestNullIteratorAnd(t *testing.T) { + all := NewInt64(1, 3) + null := NewNull() + a := NewAnd() + a.AddSubIterator(all) + a.AddSubIterator(null) + newIt, changed := a.Optimize() + if !changed { + t.Error("Didn't change") + } + if newIt.Type() != "null" { + t.Error("Expected null iterator, got ", newIt.Type()) + } +} + +func TestReorderWithTag(t *testing.T) { + all := NewInt64(100, 300) + all.AddTag("good") + all2 := NewInt64(1, 30000) + all2.AddTag("slow") + a := NewAnd() + // Make all2 the default iterator + a.AddSubIterator(all2) + a.AddSubIterator(all) + + newIt, changed := a.Optimize() + if !changed { + t.Error("Expected new iterator") + } + expectedTags := []string{"good", "slow"} + tagsOut := make([]string, 0) + for _, sub := range newIt.GetSubIterators() { + for _, x := range sub.Tags() { + tagsOut = append(tagsOut, x) + } + } + if !reflect.DeepEqual(expectedTags, tagsOut) { + t.Fatal("Tags don't match") + } +} + +func TestAndStatistics(t *testing.T) { + all := NewInt64(100, 300) + all.AddTag("good") + all2 := NewInt64(1, 30000) + all2.AddTag("slow") + a := NewAnd() + // Make all2 the default iterator + a.AddSubIterator(all2) + a.AddSubIterator(all) + stats1 := a.GetStats() + newIt, changed := a.Optimize() + if !changed { + t.Error("Didn't optimize") + } + stats2 := newIt.GetStats() + if stats2.NextCost > stats1.NextCost { + t.Error("And didn't optimize. Next cost old ", stats1.NextCost, "and new ", stats2.NextCost) + } +} diff --git a/graph/iterator/and_iterator_test.go b/graph/iterator/and_iterator_test.go new file mode 100644 index 0000000..0a531bf --- /dev/null +++ b/graph/iterator/and_iterator_test.go @@ -0,0 +1,149 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +import ( + "testing" + + "github.com/google/cayley/graph" +) + +// Make sure that tags work on the And. +func TestTag(t *testing.T) { + fix1 := newFixed() + fix1.AddValue(234) + fix1.AddTag("foo") + and := NewAnd() + and.AddSubIterator(fix1) + and.AddTag("bar") + out := fix1.Tags() + if len(out) != 1 { + t.Errorf("Expected length 1, got %d", len(out)) + } + if out[0] != "foo" { + t.Errorf("Cannot get tag back, got %s", out[0]) + } + + val, ok := and.Next() + if !ok { + t.Errorf("And did not next") + } + if val != 234 { + t.Errorf("Unexpected value") + } + tags := make(map[string]graph.TSVal) + and.TagResults(&tags) + if tags["bar"] != 234 { + t.Errorf("no bar tag") + } + if tags["foo"] != 234 { + t.Errorf("no foo tag") + } +} + +// Do a simple itersection of fixed values. +func TestAndAndFixedIterators(t *testing.T) { + fix1 := newFixed() + fix1.AddValue(1) + fix1.AddValue(2) + fix1.AddValue(3) + fix1.AddValue(4) + fix2 := newFixed() + fix2.AddValue(3) + fix2.AddValue(4) + fix2.AddValue(5) + and := NewAnd() + and.AddSubIterator(fix1) + and.AddSubIterator(fix2) + // Should be as big as smallest subiterator + size, accurate := and.Size() + if size != 3 { + t.Error("Incorrect size") + } + if !accurate { + t.Error("not accurate") + } + + val, ok := and.Next() + if val != 3 || ok == false { + t.Error("Incorrect first value") + } + + val, ok = and.Next() + if val != 4 || ok == false { + t.Error("Incorrect second value") + } + + val, ok = and.Next() + if ok { + t.Error("Too many values") + } + +} + +// If there's no intersection, the size should still report the same, +// but there should be nothing to Next() +func TestNonOverlappingFixedIterators(t *testing.T) { + fix1 := newFixed() + fix1.AddValue(1) + fix1.AddValue(2) + fix1.AddValue(3) + fix1.AddValue(4) + fix2 := newFixed() + fix2.AddValue(5) + fix2.AddValue(6) + fix2.AddValue(7) + and := NewAnd() + and.AddSubIterator(fix1) + and.AddSubIterator(fix2) + // Should be as big as smallest subiterator + size, accurate := and.Size() + if size != 3 { + t.Error("Incorrect size") + } + if !accurate { + t.Error("not accurate") + } + + _, ok := and.Next() + if ok { + t.Error("Too many values") + } + +} + +func TestAllIterators(t *testing.T) { + all1 := NewInt64(1, 5) + all2 := NewInt64(4, 10) + and := NewAnd() + and.AddSubIterator(all2) + and.AddSubIterator(all1) + + val, ok := and.Next() + if val.(int64) != 4 || ok == false { + t.Error("Incorrect first value") + } + + val, ok = and.Next() + if val.(int64) != 5 || ok == false { + t.Error("Incorrect second value") + } + + val, ok = and.Next() + if ok { + t.Error("Too many values") + } + +} diff --git a/graph/iterator/fixed_iterator.go b/graph/iterator/fixed_iterator.go new file mode 100644 index 0000000..08cb5f1 --- /dev/null +++ b/graph/iterator/fixed_iterator.go @@ -0,0 +1,157 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// Defines one of the base iterators, the Fixed iterator. A fixed iterator is quite simple; it +// contains an explicit fixed array of values. +// +// A fixed iterator requires an Equality function to be passed to it, by reason that graph.TSVal, the +// opaque Triple store value, may not answer to ==. + +import ( + "fmt" + "strings" + + "github.com/google/cayley/graph" +) + +// A Fixed iterator consists of it's values, an index (where it is in the process of Next()ing) and +// an equality function. +type Fixed struct { + Base + values []graph.TSVal + lastIndex int + cmp Equality +} + +// Define the signature of an equality function. +type Equality func(a, b graph.TSVal) bool + +// Define an equality function of purely ==, which works for native types. +func BasicEquality(a, b graph.TSVal) bool { + if a == b { + return true + } + return false +} + +// Creates a new Fixed iterator based around == equality. +func newFixed() *Fixed { + return NewFixedIteratorWithCompare(BasicEquality) +} + +// Creates a new Fixed iterator with a custom comparitor. +func NewFixedIteratorWithCompare(compareFn Equality) *Fixed { + var it Fixed + BaseInit(&it.Base) + it.values = make([]graph.TSVal, 0, 20) + it.lastIndex = 0 + it.cmp = compareFn + return &it +} + +func (it *Fixed) Reset() { + it.lastIndex = 0 +} + +func (it *Fixed) Close() {} + +func (it *Fixed) Clone() graph.Iterator { + out := NewFixedIteratorWithCompare(it.cmp) + for _, val := range it.values { + out.AddValue(val) + } + out.CopyTagsFrom(it) + return out +} + +// Add a value to the iterator. The array now contains this value. +// TODO(barakmich): This ought to be a set someday, disallowing repeated values. +func (it *Fixed) AddValue(v graph.TSVal) { + it.values = append(it.values, v) +} + +// Print some information about the iterator. +func (it *Fixed) DebugString(indent int) string { + value := "" + if len(it.values) > 0 { + value = fmt.Sprint(it.values[0]) + } + return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)", + strings.Repeat(" ", indent), + it.Type(), + it.FixedTags(), + len(it.values), + value, + ) +} + +// Register this iterator as a Fixed iterator. +func (it *Fixed) Type() string { + return "fixed" +} + +// Check if the passed value is equal to one of the values stored in the iterator. +func (it *Fixed) Check(v graph.TSVal) bool { + // Could be optimized by keeping it sorted or using a better datastructure. + // However, for fixed iterators, which are by definition kind of tiny, this + // isn't a big issue. + CheckLogIn(it, v) + for _, x := range it.values { + if it.cmp(x, v) { + it.Last = x + return CheckLogOut(it, v, true) + } + } + return CheckLogOut(it, v, false) +} + +// Return the next stored value from the iterator. +func (it *Fixed) Next() (graph.TSVal, bool) { + NextLogIn(it) + if it.lastIndex == len(it.values) { + return NextLogOut(it, nil, false) + } + out := it.values[it.lastIndex] + it.Last = out + it.lastIndex++ + return NextLogOut(it, out, true) +} + +// Optimize() for a Fixed iterator is simple. Returns a Null iterator if it's empty +// (so that other iterators upstream can treat this as null) or there is no +// optimization. +func (it *Fixed) Optimize() (graph.Iterator, bool) { + if len(it.values) == 1 && it.values[0] == nil { + return &Null{}, true + } + + return it, false +} + +// Size is the number of values stored. +func (it *Fixed) Size() (int64, bool) { + return int64(len(it.values)), true +} + +// As we right now have to scan the entire list, Next and Check are linear with the +// size. However, a better data structure could remove these limits. +func (it *Fixed) GetStats() *graph.IteratorStats { + return &graph.IteratorStats{ + CheckCost: int64(len(it.values)), + NextCost: int64(len(it.values)), + Size: int64(len(it.values)), + } +} diff --git a/graph/iterator/hasa_iterator.go b/graph/iterator/hasa_iterator.go new file mode 100644 index 0000000..26cce56 --- /dev/null +++ b/graph/iterator/hasa_iterator.go @@ -0,0 +1,223 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// Defines one of the base iterators, the HasA iterator. The HasA takes a +// subiterator of links, and acts as an iterator of nodes in the given +// direction. The name comes from the idea that a "link HasA subject" or a "link +// HasA predicate". +// +// HasA is weird in that it may return the same value twice if on the Next() +// path. That's okay -- in reality, it can be viewed as returning the value for +// a new triple, but to make logic much simpler, here we have the HasA. +// +// Likewise, it's important to think about Check()ing a HasA. When given a +// value to check, it means "Check all predicates that have this value for your +// direction against the subiterator." This would imply that there's more than +// one possibility for the same Check()ed value. While we could return the +// number of options, it's simpler to return one, and then call NextResult() +// enough times to enumerate the options. (In fact, one could argue that the +// raison d'etre for NextResult() is this iterator). +// +// Alternatively, can be seen as the dual of the LinksTo iterator. + +import ( + "fmt" + "strings" + + "github.com/barakmich/glog" + + "github.com/google/cayley/graph" +) + +// A HasA consists of a reference back to the graph.TripleStore that it references, +// a primary subiterator, a direction in which the triples for that subiterator point, +// and a temporary holder for the iterator generated on Check(). +type HasA struct { + Base + ts graph.TripleStore + primaryIt graph.Iterator + dir graph.Direction + resultIt graph.Iterator +} + +// Construct a new HasA iterator, given the triple subiterator, and the triple +// direction for which it stands. +func NewHasA(ts graph.TripleStore, subIt graph.Iterator, d graph.Direction) *HasA { + var hasa HasA + BaseInit(&hasa.Base) + hasa.ts = ts + hasa.primaryIt = subIt + hasa.dir = d + return &hasa +} + +// Return our sole subiterator. +func (it *HasA) GetSubIterators() []graph.Iterator { + return []graph.Iterator{it.primaryIt} +} + +func (it *HasA) Reset() { + it.primaryIt.Reset() + if it.resultIt != nil { + it.resultIt.Close() + } +} + +func (it *HasA) Clone() graph.Iterator { + out := NewHasA(it.ts, it.primaryIt.Clone(), it.dir) + out.CopyTagsFrom(it) + return out +} + +// Direction accessor. +func (it *HasA) Direction() graph.Direction { return it.dir } + +// Pass the Optimize() call along to the subiterator. If it becomes Null, +// then the HasA becomes Null (there are no triples that have any directions). +func (it *HasA) Optimize() (graph.Iterator, bool) { + newPrimary, changed := it.primaryIt.Optimize() + if changed { + it.primaryIt = newPrimary + if it.primaryIt.Type() == "null" { + return it.primaryIt, true + } + } + return it, false +} + +// Pass the TagResults down the chain. +func (it *HasA) TagResults(out *map[string]graph.TSVal) { + it.Base.TagResults(out) + it.primaryIt.TagResults(out) +} + +// DEPRECATED Return results in a ResultTree. +func (it *HasA) GetResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.LastResult()) + tree.AddSubtree(it.primaryIt.GetResultTree()) + return tree +} + +// Print some information about this iterator. +func (it *HasA) DebugString(indent int) string { + var tags string + for _, k := range it.Tags() { + tags += fmt.Sprintf("%s;", k) + } + return fmt.Sprintf("%s(%s %d tags:%s direction:%s\n%s)", strings.Repeat(" ", indent), it.Type(), it.GetUid(), tags, it.dir, it.primaryIt.DebugString(indent+4)) +} + +// Check a value against our internal iterator. In order to do this, we must first open a new +// iterator of "triples that have `val` in our direction", given to us by the triple store, +// and then Next() values out of that iterator and Check() them against our subiterator. +func (it *HasA) Check(val graph.TSVal) bool { + CheckLogIn(it, val) + if glog.V(4) { + glog.V(4).Infoln("Id is", it.ts.GetNameFor(val)) + } + // TODO(barakmich): Optimize this + if it.resultIt != nil { + it.resultIt.Close() + } + it.resultIt = it.ts.GetTripleIterator(it.dir, val) + return CheckLogOut(it, val, it.GetCheckResult()) +} + +// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the +// result iterator (a triple iterator based on the last checked value) and returns true if +// another match is made. +func (it *HasA) GetCheckResult() bool { + for { + linkVal, ok := it.resultIt.Next() + if !ok { + break + } + if glog.V(4) { + glog.V(4).Infoln("Triple is", it.ts.GetTriple(linkVal)) + } + if it.primaryIt.Check(linkVal) { + it.Last = it.ts.GetTripleDirection(linkVal, it.dir) + return true + } + } + return false +} + +// Get the next result that matches this branch. +func (it *HasA) NextResult() bool { + // Order here is important. If the subiterator has a NextResult, then we + // need do nothing -- there is a next result, and we shouldn't move forward. + // However, we then need to get the next result from our last Check(). + // + // The upshot is, the end of NextResult() bubbles up from the bottom of the + // iterator tree up, and we need to respect that. + if it.primaryIt.NextResult() { + return true + } + return it.GetCheckResult() +} + +// Get the next result from this iterator. This is simpler than Check. We have a +// subiterator we can get a value from, and we can take that resultant triple, +// pull our direction out of it, and return that. +func (it *HasA) Next() (graph.TSVal, bool) { + NextLogIn(it) + if it.resultIt != nil { + it.resultIt.Close() + } + it.resultIt = &Null{} + + tID, ok := it.primaryIt.Next() + if !ok { + return NextLogOut(it, 0, false) + } + name := it.ts.GetTriple(tID).Get(it.dir) + val := it.ts.GetIdFor(name) + it.Last = val + return NextLogOut(it, val, true) +} + +// GetStats() returns the statistics on the HasA iterator. This is curious. Next +// cost is easy, it's an extra call or so on top of the subiterator Next cost. +// CheckCost involves going to the graph.TripleStore, iterating out values, and hoping +// one sticks -- potentially expensive, depending on fanout. Size, however, is +// potentially smaller. we know at worst it's the size of the subiterator, but +// if there are many repeated values, it could be much smaller in totality. +func (it *HasA) GetStats() *graph.IteratorStats { + subitStats := it.primaryIt.GetStats() + // TODO(barakmich): These should really come from the triplestore itself + // and be optimized. + faninFactor := int64(1) + fanoutFactor := int64(30) + nextConstant := int64(2) + tripleConstant := int64(1) + return &graph.IteratorStats{ + NextCost: tripleConstant + subitStats.NextCost, + CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost, + Size: faninFactor * subitStats.Size, + } +} + +// Close the subiterator, the result iterator (if any) and the HasA. +func (it *HasA) Close() { + if it.resultIt != nil { + it.resultIt.Close() + } + it.primaryIt.Close() +} + +// Register this iterator as a HasA. +func (it *HasA) Type() string { return "hasa" } diff --git a/graph/iterator/iterator.go b/graph/iterator/iterator.go new file mode 100644 index 0000000..c015a64 --- /dev/null +++ b/graph/iterator/iterator.go @@ -0,0 +1,223 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// Define the general iterator interface, as well as the Base which all +// iterators can "inherit" from to get default iterator functionality. + +import ( + "fmt" + "strings" + + "github.com/barakmich/glog" + + "github.com/google/cayley/graph" +) + +var iterator_n int = 0 + +// The Base iterator is the iterator other iterators inherit from to get some +// default functionality. +type Base struct { + Last graph.TSVal + tags []string + fixedTags map[string]graph.TSVal + nextable bool + uid int +} + +// Called by subclases. +func BaseInit(it *Base) { + // Your basic iterator is nextable + it.nextable = true + it.uid = iterator_n + if glog.V(2) { + iterator_n++ + } +} + +func (it *Base) GetUid() int { + return it.uid +} + +// Adds a tag to the iterator. Most iterators don't need to override. +func (it *Base) AddTag(tag string) { + if it.tags == nil { + it.tags = make([]string, 0) + } + it.tags = append(it.tags, tag) +} + +func (it *Base) AddFixedTag(tag string, value graph.TSVal) { + if it.fixedTags == nil { + it.fixedTags = make(map[string]graph.TSVal) + } + it.fixedTags[tag] = value +} + +// Returns the tags. +func (it *Base) Tags() []string { + return it.tags +} + +func (it *Base) FixedTags() map[string]graph.TSVal { + return it.fixedTags +} + +func (it *Base) CopyTagsFrom(other_it graph.Iterator) { + for _, tag := range other_it.Tags() { + it.AddTag(tag) + } + + for k, v := range other_it.FixedTags() { + it.AddFixedTag(k, v) + } + +} + +// Prints a silly debug string. Most classes override. +func (it *Base) DebugString(indent int) string { + return fmt.Sprintf("%s(base)", strings.Repeat(" ", indent)) +} + +// Nothing in a base iterator. +func (it *Base) Check(v graph.TSVal) bool { + return false +} + +// Base iterators should never appear in a tree if they are, select against +// them. +func (it *Base) GetStats() *graph.IteratorStats { + return &graph.IteratorStats{100000, 100000, 100000} +} + +// DEPRECATED +func (it *Base) GetResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.LastResult()) + return tree +} + +// Nothing in a base iterator. +func (it *Base) Next() (graph.TSVal, bool) { + return nil, false +} + +func (it *Base) NextResult() bool { + return false +} + +// Returns the last result of an iterator. +func (it *Base) LastResult() graph.TSVal { + return it.Last +} + +// If you're empty and you know it, clap your hands. +func (it *Base) Size() (int64, bool) { + return 0, true +} + +// No subiterators. Only those with subiterators need to do anything here. +func (it *Base) GetSubIterators() []graph.Iterator { + return nil +} + +// Accessor +func (it *Base) Nextable() bool { return it.nextable } + +// Fill the map based on the tags assigned to this iterator. Default +// functionality works well for most iterators. +func (it *Base) TagResults(out_map *map[string]graph.TSVal) { + for _, tag := range it.Tags() { + (*out_map)[tag] = it.LastResult() + } + + for tag, value := range it.FixedTags() { + (*out_map)[tag] = value + } +} + +// Nothing to clean up. +// func (it *Base) Close() {} + +func (it *Null) Close() {} + +func (it *Base) Reset() {} + +// Here we define the simplest base iterator -- the Null iterator. It contains nothing. +// It is the empty set. Often times, queries that contain one of these match nothing, +// so it's important to give it a special iterator. +type Null struct { + Base +} + +// Fairly useless New function. +func NewNull() *Null { + return &Null{} +} + +func (it *Null) Clone() graph.Iterator { return NewNull() } + +// Name the null iterator. +func (it *Null) Type() string { return "null" } + +// A good iterator will close itself when it returns true. +// Null has nothing it needs to do. +func (it *Null) Optimize() (graph.Iterator, bool) { return it, false } + +// Print the null iterator. +func (it *Null) DebugString(indent int) string { + return strings.Repeat(" ", indent) + "(null)" +} + +// A null iterator costs nothing. Use it! +func (it *Null) GetStats() *graph.IteratorStats { + return &graph.IteratorStats{} +} + +// Utility logging functions for when an iterator gets called Next upon, or Check upon, as +// well as what they return. Highly useful for tracing the execution path of a query. +func CheckLogIn(it graph.Iterator, val graph.TSVal) { + if glog.V(4) { + glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type()), it.GetUid(), val) + } +} + +func CheckLogOut(it graph.Iterator, val graph.TSVal, good bool) bool { + if glog.V(4) { + if good { + glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type()), it.GetUid(), val) + } else { + glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type()), it.GetUid(), val) + } + } + return good +} + +func NextLogIn(it graph.Iterator) { + if glog.V(4) { + glog.V(4).Infof("%s %d NEXT", strings.ToUpper(it.Type()), it.GetUid()) + } +} + +func NextLogOut(it graph.Iterator, val graph.TSVal, ok bool) (graph.TSVal, bool) { + if glog.V(4) { + if ok { + glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type()), it.GetUid(), val) + } else { + glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type()), it.GetUid()) + } + } + return val, ok +} diff --git a/graph/iterator/linksto_iterator.go b/graph/iterator/linksto_iterator.go new file mode 100644 index 0000000..55bc475 --- /dev/null +++ b/graph/iterator/linksto_iterator.go @@ -0,0 +1,183 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// Defines one of the base iterators, the LinksTo iterator. A LinksTo takes a +// subiterator of nodes, and contains an iteration of links which "link to" +// those nodes in a given direction. +// +// Next()ing a LinksTo is straightforward -- iterate through all links to // +// things in the subiterator, and then advance the subiterator, and do it again. +// LinksTo is therefore sensitive to growing with a fanout. (A small-sized +// subiterator could cause LinksTo to be large). +// +// Check()ing a LinksTo means, given a link, take the direction we care about +// and check if it's in our subiterator. Checking is therefore fairly cheap, and +// similar to checking the subiterator alone. +// +// Can be seen as the dual of the HasA iterator. + +import ( + "fmt" + "strings" + + "github.com/google/cayley/graph" +) + +// A LinksTo has a reference back to the graph.TripleStore (to create the iterators +// for each node) the subiterator, and the direction the iterator comes from. +// `next_it` is the tempoarary iterator held per result in `primary_it`. +type LinksTo struct { + Base + ts graph.TripleStore + primaryIt graph.Iterator + dir graph.Direction + nextIt graph.Iterator +} + +// Construct a new LinksTo iterator around a direction and a subiterator of +// nodes. +func NewLinksTo(ts graph.TripleStore, it graph.Iterator, d graph.Direction) *LinksTo { + var lto LinksTo + BaseInit(<o.Base) + lto.ts = ts + lto.primaryIt = it + lto.dir = d + lto.nextIt = &Null{} + return <o +} + +func (it *LinksTo) Reset() { + it.primaryIt.Reset() + if it.nextIt != nil { + it.nextIt.Close() + } + it.nextIt = &Null{} +} + +func (it *LinksTo) Clone() graph.Iterator { + out := NewLinksTo(it.ts, it.primaryIt.Clone(), it.dir) + out.CopyTagsFrom(it) + return out +} + +// Return the direction under consideration. +func (it *LinksTo) Direction() graph.Direction { return it.dir } + +// Tag these results, and our subiterator's results. +func (it *LinksTo) TagResults(out *map[string]graph.TSVal) { + it.Base.TagResults(out) + it.primaryIt.TagResults(out) +} + +// DEPRECATED +func (it *LinksTo) GetResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.LastResult()) + tree.AddSubtree(it.primaryIt.GetResultTree()) + return tree +} + +// Print the iterator. +func (it *LinksTo) DebugString(indent int) string { + return fmt.Sprintf("%s(%s %d direction:%s\n%s)", + strings.Repeat(" ", indent), + it.Type(), it.GetUid(), it.dir, it.primaryIt.DebugString(indent+4)) +} + +// If it checks in the right direction for the subiterator, it is a valid link +// for the LinksTo. +func (it *LinksTo) Check(val graph.TSVal) bool { + CheckLogIn(it, val) + node := it.ts.GetTripleDirection(val, it.dir) + if it.primaryIt.Check(node) { + it.Last = val + return CheckLogOut(it, val, true) + } + return CheckLogOut(it, val, false) +} + +// Return a list containing only our subiterator. +func (it *LinksTo) GetSubIterators() []graph.Iterator { + return []graph.Iterator{it.primaryIt} +} + +// Optimize the LinksTo, by replacing it if it can be. +func (it *LinksTo) Optimize() (graph.Iterator, bool) { + newPrimary, changed := it.primaryIt.Optimize() + if changed { + it.primaryIt = newPrimary + if it.primaryIt.Type() == "null" { + it.nextIt.Close() + return it.primaryIt, true + } + } + // Ask the graph.TripleStore if we can be replaced. Often times, this is a great + // optimization opportunity (there's a fixed iterator underneath us, for + // example). + newReplacement, hasOne := it.ts.OptimizeIterator(it) + if hasOne { + it.Close() + return newReplacement, true + } + return it, false +} + +// Next()ing a LinksTo operates as described above. +func (it *LinksTo) Next() (graph.TSVal, bool) { + NextLogIn(it) + val, ok := it.nextIt.Next() + if !ok { + // Subiterator is empty, get another one + candidate, ok := it.primaryIt.Next() + if !ok { + // We're out of nodes in our subiterator, so we're done as well. + return NextLogOut(it, 0, false) + } + it.nextIt.Close() + it.nextIt = it.ts.GetTripleIterator(it.dir, candidate) + // Recurse -- return the first in the next set. + return it.Next() + } + it.Last = val + return NextLogOut(it, val, ok) +} + +// Close our subiterators. +func (it *LinksTo) Close() { + it.nextIt.Close() + it.primaryIt.Close() +} + +// We won't ever have a new result, but our subiterators might. +func (it *LinksTo) NextResult() bool { + return it.primaryIt.NextResult() +} + +// Register the LinksTo. +func (it *LinksTo) Type() string { return "linksto" } + +// Return a guess as to how big or costly it is to next the iterator. +func (it *LinksTo) GetStats() *graph.IteratorStats { + subitStats := it.primaryIt.GetStats() + // TODO(barakmich): These should really come from the triplestore itself + fanoutFactor := int64(20) + checkConstant := int64(1) + nextConstant := int64(2) + return &graph.IteratorStats{ + NextCost: nextConstant + subitStats.NextCost, + CheckCost: checkConstant + subitStats.CheckCost, + Size: fanoutFactor * subitStats.Size, + } +} diff --git a/graph/iterator/linksto_iterator_test.go b/graph/iterator/linksto_iterator_test.go new file mode 100644 index 0000000..a799473 --- /dev/null +++ b/graph/iterator/linksto_iterator_test.go @@ -0,0 +1,39 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +import ( + "testing" + + "github.com/google/cayley/graph" +) + +func TestLinksTo(t *testing.T) { + ts := new(TestTripleStore) + tsFixed := newFixed() + tsFixed.AddValue(2) + ts.On("GetIdFor", "cool").Return(1) + ts.On("GetTripleIterator", graph.Object, 1).Return(tsFixed) + fixed := newFixed() + fixed.AddValue(ts.GetIdFor("cool")) + lto := NewLinksTo(ts, fixed, graph.Object) + val, ok := lto.Next() + if !ok { + t.Error("At least one triple matches the fixed object") + } + if val != 2 { + t.Errorf("Triple index 2, such as %s, should match %s", ts.GetTriple(2), ts.GetTriple(val)) + } +} diff --git a/graph/iterator/mock_ts_test.go b/graph/iterator/mock_ts_test.go new file mode 100644 index 0000000..1aee954 --- /dev/null +++ b/graph/iterator/mock_ts_test.go @@ -0,0 +1,60 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// A quickly mocked version of the TripleStore interface, for use in tests. +// Can better used Mock.Called but will fill in as needed. + +import ( + "github.com/stretchrcom/testify/mock" + + "github.com/google/cayley/graph" +) + +type TestTripleStore struct { + mock.Mock +} + +func (ts *TestTripleStore) GetIdFor(s string) graph.TSVal { + args := ts.Mock.Called(s) + return args.Get(0) +} +func (ts *TestTripleStore) AddTriple(*graph.Triple) {} +func (ts *TestTripleStore) AddTripleSet([]*graph.Triple) {} +func (ts *TestTripleStore) GetTriple(graph.TSVal) *graph.Triple { return &graph.Triple{} } +func (ts *TestTripleStore) GetTripleIterator(d graph.Direction, i graph.TSVal) graph.Iterator { + args := ts.Mock.Called(d, i) + return args.Get(0).(graph.Iterator) +} +func (ts *TestTripleStore) GetNodesAllIterator() graph.Iterator { return &Null{} } +func (ts *TestTripleStore) GetTriplesAllIterator() graph.Iterator { return &Null{} } +func (ts *TestTripleStore) GetIteratorByString(string, string, string) graph.Iterator { + return &Null{} +} +func (ts *TestTripleStore) GetNameFor(v graph.TSVal) string { + args := ts.Mock.Called(v) + return args.Get(0).(string) +} +func (ts *TestTripleStore) Size() int64 { return 0 } +func (ts *TestTripleStore) DebugPrint() {} +func (ts *TestTripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { + return &Null{}, false +} +func (ts *TestTripleStore) FixedIterator() graph.FixedIterator { + return NewFixedIteratorWithCompare(BasicEquality) +} +func (ts *TestTripleStore) Close() {} +func (ts *TestTripleStore) GetTripleDirection(graph.TSVal, graph.Direction) graph.TSVal { return 0 } +func (ts *TestTripleStore) RemoveTriple(t *graph.Triple) {} diff --git a/graph/iterator/optional_iterator.go b/graph/iterator/optional_iterator.go new file mode 100644 index 0000000..8d7ae97 --- /dev/null +++ b/graph/iterator/optional_iterator.go @@ -0,0 +1,137 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// "Optional" is kind of odd. It's not an iterator in the strictest sense, but +// it's easier to implement as an iterator. +// +// Consider what it means. It means that we have a subconstraint which we do +// not want to constrain the query -- we just want it to return the matching +// subgraph if one matches at all. By analogy to regular expressions, it is the +// '?' operator. +// +// If it were a proper iterator of its own (and indeed, a reasonable refactor +// of this iterator would be to make it such) it would contain an all iterator +// -- all things in the graph. It matches everything (as does the regex "(a)?") + +import ( + "fmt" + "strings" + + "github.com/barakmich/glog" + + "github.com/google/cayley/graph" +) + +// An optional iterator has the subconstraint iterator we wish to be optional +// and whether the last check we received was true or false. +type Optional struct { + Base + subIt graph.Iterator + lastCheck bool +} + +// Creates a new optional iterator. +func NewOptional(it graph.Iterator) *Optional { + var o Optional + BaseInit(&o.Base) + o.nextable = false + o.subIt = it + return &o +} + +func (it *Optional) Reset() { + it.subIt.Reset() + it.lastCheck = false +} + +func (it *Optional) Close() { + it.subIt.Close() +} + +func (it *Optional) Clone() graph.Iterator { + out := NewOptional(it.subIt.Clone()) + out.CopyTagsFrom(it) + return out +} + +// Nexting the iterator is unsupported -- error and return an empty set. +// (As above, a reasonable alternative would be to Next() an all iterator) +func (it *Optional) Next() (graph.TSVal, bool) { + glog.Errorln("Nexting an un-nextable iterator") + return nil, false +} + +// An optional iterator only has a next result if, (a) last time we checked +// we had any results whatsoever, and (b) there was another subresult in our +// optional subbranch. +func (it *Optional) NextResult() bool { + if it.lastCheck { + return it.subIt.NextResult() + } + return false +} + +// Check() is the real hack of this iterator. It always returns true, regardless +// of whether the subiterator matched. But we keep track of whether the subiterator +// matched for results purposes. +func (it *Optional) Check(val graph.TSVal) bool { + checked := it.subIt.Check(val) + it.lastCheck = checked + it.Last = val + return true +} + +// If we failed the check, then the subiterator should not contribute to the result +// set. Otherwise, go ahead and tag it. +func (it *Optional) TagResults(out *map[string]graph.TSVal) { + if it.lastCheck == false { + return + } + it.subIt.TagResults(out) +} + +// Registers the optional iterator. +func (it *Optional) Type() string { return "optional" } + +// Prints the optional and it's subiterator. +func (it *Optional) DebugString(indent int) string { + return fmt.Sprintf("%s(%s tags:%s\n%s)", + strings.Repeat(" ", indent), + it.Type(), + it.Tags(), + it.subIt.DebugString(indent+4)) +} + +// There's nothing to optimize for an optional. Optimize the subiterator and +// potentially replace it. +func (it *Optional) Optimize() (graph.Iterator, bool) { + newSub, changed := it.subIt.Optimize() + if changed { + it.subIt.Close() + it.subIt = newSub + } + return it, false +} + +// We're only as expensive as our subiterator. Except, we can't be nexted. +func (it *Optional) GetStats() *graph.IteratorStats { + subStats := it.subIt.GetStats() + return &graph.IteratorStats{ + CheckCost: subStats.CheckCost, + NextCost: int64(1 << 62), + Size: subStats.Size, + } +} diff --git a/graph/iterator/or_iterator.go b/graph/iterator/or_iterator.go new file mode 100644 index 0000000..b5a269d --- /dev/null +++ b/graph/iterator/or_iterator.go @@ -0,0 +1,284 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// Defines the or and short-circuiting or iterator. Or is the union operator for it's subiterators. +// Short-circuiting-or is a little different. It will return values from the first graph.iterator that returns +// values at all, and then stops. +// +// Never reorders the iterators from the order they arrive. It is either the union or the first one. +// May return the same value twice -- once for each branch. + +import ( + "fmt" + "strings" + + "github.com/google/cayley/graph" +) + +type Or struct { + Base + isShortCircuiting bool + internalIterators []graph.Iterator + itCount int + currentIterator int +} + +func NewOr() *Or { + var or Or + BaseInit(&or.Base) + or.internalIterators = make([]graph.Iterator, 0, 20) + or.isShortCircuiting = false + or.currentIterator = -1 + return &or +} + +func NewShortCircuitOr() *Or { + var or Or + BaseInit(&or.Base) + or.internalIterators = make([]graph.Iterator, 0, 20) + or.isShortCircuiting = true + or.currentIterator = -1 + return &or +} + +// Reset all internal iterators +func (it *Or) Reset() { + for _, sub := range it.internalIterators { + sub.Reset() + } + it.currentIterator = -1 +} + +func (it *Or) Clone() graph.Iterator { + var or *Or + if it.isShortCircuiting { + or = NewShortCircuitOr() + } else { + or = NewOr() + } + for _, sub := range it.internalIterators { + or.AddSubIterator(sub.Clone()) + } + it.CopyTagsFrom(it) + return or +} + +// Returns a list.List of the subiterators, in order. The returned slice must not be modified. +func (it *Or) GetSubIterators() []graph.Iterator { + return it.internalIterators +} + +// Overrides BaseIterator TagResults, as it needs to add it's own results and +// recurse down it's subiterators. +func (it *Or) TagResults(out *map[string]graph.TSVal) { + it.Base.TagResults(out) + it.internalIterators[it.currentIterator].TagResults(out) +} + +// DEPRECATED Returns the ResultTree for this graph.iterator, recurses to it's subiterators. +func (it *Or) GetResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.LastResult()) + for _, sub := range it.internalIterators { + tree.AddSubtree(sub.GetResultTree()) + } + return tree +} + +// Prints information about this graph.iterator. +func (it *Or) DebugString(indent int) string { + var total string + for i, sub := range it.internalIterators { + total += strings.Repeat(" ", indent+2) + total += fmt.Sprintf("%d:\n%s\n", i, sub.DebugString(indent+4)) + } + var tags string + for _, k := range it.Tags() { + tags += fmt.Sprintf("%s;", k) + } + spaces := strings.Repeat(" ", indent+2) + + return fmt.Sprintf("%s(%s\n%stags:%s\n%sits:\n%s)", + strings.Repeat(" ", indent), + it.Type(), + spaces, + tags, + spaces, + total) +} + +// Add a subiterator to this Or graph.iterator. Order matters. +func (it *Or) AddSubIterator(sub graph.Iterator) { + it.internalIterators = append(it.internalIterators, sub) + it.itCount++ +} + +// Returns the Next value from the Or graph.iterator. Because the Or is the +// union of its subiterators, it must produce from all subiterators -- unless +// it's shortcircuiting, in which case, it's the first one that returns anything. +func (it *Or) Next() (graph.TSVal, bool) { + NextLogIn(it) + var curr graph.TSVal + var exists bool + firstTime := false + for { + if it.currentIterator == -1 { + it.currentIterator = 0 + firstTime = true + } + curIt := it.internalIterators[it.currentIterator] + curr, exists = curIt.Next() + if !exists { + if it.isShortCircuiting && !firstTime { + return NextLogOut(it, nil, false) + } + it.currentIterator++ + if it.currentIterator == it.itCount { + return NextLogOut(it, nil, false) + } + } else { + it.Last = curr + return NextLogOut(it, curr, true) + } + } + panic("Somehow broke out of Next() loop in Or") +} + +// Checks a value against the iterators, in order. +func (it *Or) checkSubIts(val graph.TSVal) bool { + var subIsGood = false + for i, sub := range it.internalIterators { + subIsGood = sub.Check(val) + if subIsGood { + it.currentIterator = i + break + } + } + return subIsGood +} + +// Check a value against the entire graph.iterator, in order. +func (it *Or) Check(val graph.TSVal) bool { + CheckLogIn(it, val) + anyGood := it.checkSubIts(val) + if !anyGood { + return CheckLogOut(it, val, false) + } + it.Last = val + return CheckLogOut(it, val, true) +} + +// Returns the approximate size of the Or graph.iterator. Because we're dealing +// with a union, we know that the largest we can be is the sum of all the iterators, +// or in the case of short-circuiting, the longest. +func (it *Or) Size() (int64, bool) { + var val int64 + var b bool + if it.isShortCircuiting { + val = 0 + b = true + for _, sub := range it.internalIterators { + newval, newb := sub.Size() + if val < newval { + val = newval + } + b = newb && b + } + } else { + val = 0 + b = true + for _, sub := range it.internalIterators { + newval, newb := sub.Size() + val += newval + b = newb && b + } + } + return val, b +} + +// An Or has no NextResult of its own -- that is, there are no other values +// which satisfy our previous result that are not the result itself. Our +// subiterators might, however, so just pass the call recursively. In the case of +// shortcircuiting, only allow new results from the currently checked graph.iterator +func (it *Or) NextResult() bool { + if it.currentIterator != -1 { + return it.internalIterators[it.currentIterator].NextResult() + } + return false +} + +// Perform or-specific cleanup, of which there currently is none. +func (it *Or) cleanUp() {} + +// Close this graph.iterator, and, by extension, close the subiterators. +// Close should be idempotent, and it follows that if it's subiterators +// follow this contract, the And follows the contract. +func (it *Or) Close() { + it.cleanUp() + for _, sub := range it.internalIterators { + sub.Close() + } +} + +func (it *Or) Optimize() (graph.Iterator, bool) { + old := it.GetSubIterators() + optIts := optimizeSubIterators(old) + // Close the replaced iterators (they ought to close themselves, but Close() + // is idempotent, so this just protects against any machinations). + closeIteratorList(old, nil) + newOr := NewOr() + newOr.isShortCircuiting = it.isShortCircuiting + + // Add the subiterators in order. + for _, o := range optIts { + newOr.AddSubIterator(o) + } + + // Move the tags hanging on us (like any good replacement). + newOr.CopyTagsFrom(it) + + // And close ourselves but not our subiterators -- some may still be alive in + // the new And (they were unchanged upon calling Optimize() on them, at the + // start). + it.cleanUp() + return newOr, true +} + +func (it *Or) GetStats() *graph.IteratorStats { + CheckCost := int64(0) + NextCost := int64(0) + Size := int64(0) + for _, sub := range it.internalIterators { + stats := sub.GetStats() + NextCost += stats.NextCost + CheckCost += stats.CheckCost + if it.isShortCircuiting { + if Size < stats.Size { + Size = stats.Size + } + } else { + Size += stats.Size + } + } + return &graph.IteratorStats{ + CheckCost: CheckCost, + NextCost: NextCost, + Size: Size, + } + +} + +// Register this as an "or" graph.iterator. +func (it *Or) Type() string { return "or" } diff --git a/graph/iterator/or_iterator_test.go b/graph/iterator/or_iterator_test.go new file mode 100644 index 0000000..76d16d6 --- /dev/null +++ b/graph/iterator/or_iterator_test.go @@ -0,0 +1,145 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + + "github.com/google/cayley/graph" +) + +func extractNumbersFromIterator(it graph.Iterator) []int { + var outputNumbers []int + for { + val, ok := it.Next() + if !ok { + break + } + outputNumbers = append(outputNumbers, val.(int)) + } + return outputNumbers +} + +func TestOrIteratorBasics(t *testing.T) { + var orIt *Or + + Convey("Given an Or Iterator of two fixed iterators", t, func() { + orIt = NewOr() + fixed1 := newFixed() + fixed1.AddValue(1) + fixed1.AddValue(2) + fixed1.AddValue(3) + fixed2 := newFixed() + fixed2.AddValue(3) + fixed2.AddValue(9) + fixed2.AddValue(20) + fixed2.AddValue(21) + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + + Convey("It should guess its size.", func() { + v, _ := orIt.Size() + So(v, ShouldEqual, 7) + }) + + Convey("It should extract all the numbers, potentially twice.", func() { + allNumbers := []int{1, 2, 3, 3, 9, 20, 21} + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + orIt.Reset() + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + // Optimization works + newOr, _ := orIt.Optimize() + So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) + }) + + Convey("It should check that numbers in either iterator exist.", func() { + So(orIt.Check(2), ShouldEqual, true) + So(orIt.Check(3), ShouldEqual, true) + So(orIt.Check(21), ShouldEqual, true) + }) + + Convey("It should check that numbers not in either iterator are false.", func() { + So(orIt.Check(22), ShouldEqual, false) + So(orIt.Check(5), ShouldEqual, false) + So(orIt.Check(0), ShouldEqual, false) + }) + + }) + +} + +func TestShortCircuitingOrBasics(t *testing.T) { + var orIt *Or + + Convey("Given a short-circuiting Or of two fixed iterators", t, func() { + orIt = NewShortCircuitOr() + fixed1 := newFixed() + fixed1.AddValue(1) + fixed1.AddValue(2) + fixed1.AddValue(3) + fixed2 := newFixed() + fixed2.AddValue(3) + fixed2.AddValue(9) + fixed2.AddValue(20) + fixed2.AddValue(21) + + Convey("It should guess its size.", func() { + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + v, _ := orIt.Size() + So(v, ShouldEqual, 4) + }) + + Convey("It should extract the first iterators' numbers.", func() { + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + allNumbers := []int{1, 2, 3} + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + orIt.Reset() + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + // Optimization works + newOr, _ := orIt.Optimize() + So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) + }) + + Convey("It should check that numbers in either iterator exist.", func() { + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + So(orIt.Check(2), ShouldEqual, true) + So(orIt.Check(3), ShouldEqual, true) + So(orIt.Check(21), ShouldEqual, true) + So(orIt.Check(22), ShouldEqual, false) + So(orIt.Check(5), ShouldEqual, false) + So(orIt.Check(0), ShouldEqual, false) + + }) + + Convey("It should check that it pulls the second iterator's numbers if the first is empty.", func() { + orIt.AddSubIterator(newFixed()) + orIt.AddSubIterator(fixed2) + allNumbers := []int{3, 9, 20, 21} + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + orIt.Reset() + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + // Optimization works + newOr, _ := orIt.Optimize() + So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) + }) + + }) + +} diff --git a/graph/iterator/query_shape.go b/graph/iterator/query_shape.go new file mode 100644 index 0000000..b51d781 --- /dev/null +++ b/graph/iterator/query_shape.go @@ -0,0 +1,181 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +import ( + "github.com/google/cayley/graph" +) + +type Node struct { + Id int `json:"id"` + Tags []string `json:"tags,omitempty"` + Values []string `json:"values,omitempty"` + IsLinkNode bool `json:"is_link_node"` + IsFixed bool `json:"is_fixed"` +} + +type Link struct { + Source int `json:"source"` + Target int `json:"target"` + Pred int `json:"type"` + LinkNode int `json:"link_node"` +} + +type queryShape struct { + nodes []Node + links []Link + ts graph.TripleStore + nodeId int + hasaIds []int + hasaDirs []graph.Direction +} + +func OutputQueryShapeForIterator(it graph.Iterator, ts graph.TripleStore, outputMap *map[string]interface{}) { + qs := &queryShape{ + ts: ts, + nodeId: 1, + } + + node := qs.MakeNode(it.Clone()) + qs.AddNode(node) + (*outputMap)["nodes"] = qs.nodes + (*outputMap)["links"] = qs.links +} + +func (qs *queryShape) AddNode(n *Node) { + qs.nodes = append(qs.nodes, *n) +} + +func (qs *queryShape) AddLink(l *Link) { + qs.links = append(qs.links, *l) +} + +func (qs *queryShape) LastHasa() (int, graph.Direction) { + return qs.hasaIds[len(qs.hasaIds)-1], qs.hasaDirs[len(qs.hasaDirs)-1] +} + +func (qs *queryShape) PushHasa(i int, d graph.Direction) { + qs.hasaIds = append(qs.hasaIds, i) + qs.hasaDirs = append(qs.hasaDirs, d) +} + +func (qs *queryShape) RemoveHasa() { + qs.hasaIds = qs.hasaIds[:len(qs.hasaIds)-1] + qs.hasaDirs = qs.hasaDirs[:len(qs.hasaDirs)-1] +} + +func (qs *queryShape) StealNode(left *Node, right *Node) { + for _, v := range right.Values { + left.Values = append(left.Values, v) + } + for _, v := range right.Tags { + left.Tags = append(left.Tags, v) + } + left.IsLinkNode = left.IsLinkNode || right.IsLinkNode + left.IsFixed = left.IsFixed || right.IsFixed + for i, link := range qs.links { + rewrite := false + if link.LinkNode == right.Id { + link.LinkNode = left.Id + rewrite = true + } + if link.Source == right.Id { + link.Source = left.Id + rewrite = true + } + if link.Target == right.Id { + link.Target = left.Id + rewrite = true + } + if rewrite { + qs.links = append(append(qs.links[:i], qs.links[i+1:]...), link) + } + } +} + +func (qs *queryShape) MakeNode(it graph.Iterator) *Node { + n := Node{Id: qs.nodeId} + for _, tag := range it.Tags() { + n.Tags = append(n.Tags, tag) + } + for k, _ := range it.FixedTags() { + n.Tags = append(n.Tags, k) + } + + switch it.Type() { + case "and": + for _, sub := range it.GetSubIterators() { + qs.nodeId++ + newNode := qs.MakeNode(sub) + if sub.Type() != "or" { + qs.StealNode(&n, newNode) + } else { + qs.AddNode(newNode) + qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) + } + } + case "fixed": + n.IsFixed = true + for { + val, more := it.Next() + if !more { + break + } + n.Values = append(n.Values, qs.ts.GetNameFor(val)) + } + case "hasa": + hasa := it.(*HasA) + qs.PushHasa(n.Id, hasa.dir) + qs.nodeId++ + newNode := qs.MakeNode(hasa.primaryIt) + qs.AddNode(newNode) + qs.RemoveHasa() + case "or": + for _, sub := range it.GetSubIterators() { + qs.nodeId++ + newNode := qs.MakeNode(sub) + if sub.Type() == "or" { + qs.StealNode(&n, newNode) + } else { + qs.AddNode(newNode) + qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) + } + } + case "linksto": + n.IsLinkNode = true + lto := it.(*LinksTo) + qs.nodeId++ + newNode := qs.MakeNode(lto.primaryIt) + hasaID, hasaDir := qs.LastHasa() + if (hasaDir == graph.Subject && lto.dir == graph.Object) || + (hasaDir == graph.Object && lto.dir == graph.Subject) { + qs.AddNode(newNode) + if hasaDir == graph.Subject { + qs.AddLink(&Link{hasaID, newNode.Id, 0, n.Id}) + } else { + qs.AddLink(&Link{newNode.Id, hasaID, 0, n.Id}) + } + } else if lto.primaryIt.Type() == "fixed" { + qs.StealNode(&n, newNode) + } else { + qs.AddNode(newNode) + } + case "optional": + // Unsupported, for the moment + fallthrough + case "all": + } + return &n +} diff --git a/graph/iterator/query_shape_test.go b/graph/iterator/query_shape_test.go new file mode 100644 index 0000000..ef007e8 --- /dev/null +++ b/graph/iterator/query_shape_test.go @@ -0,0 +1,126 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + + "github.com/google/cayley/graph" +) + +func buildHasaWithTag(ts graph.TripleStore, tag string, target string) *HasA { + fixed_obj := ts.FixedIterator() + fixed_pred := ts.FixedIterator() + fixed_obj.AddValue(ts.GetIdFor(target)) + fixed_pred.AddValue(ts.GetIdFor("status")) + fixed_obj.AddTag(tag) + lto1 := NewLinksTo(ts, fixed_obj, graph.Object) + lto2 := NewLinksTo(ts, fixed_pred, graph.Predicate) + and := NewAnd() + and.AddSubIterator(lto1) + and.AddSubIterator(lto2) + hasa := NewHasA(ts, and, graph.Subject) + return hasa +} + +func TestQueryShape(t *testing.T) { + var queryShape map[string]interface{} + ts := new(TestTripleStore) + ts.On("GetIdFor", "cool").Return(1) + ts.On("GetNameFor", 1).Return("cool") + ts.On("GetIdFor", "status").Return(2) + ts.On("GetNameFor", 2).Return("status") + ts.On("GetIdFor", "fun").Return(3) + ts.On("GetNameFor", 3).Return("fun") + ts.On("GetIdFor", "name").Return(4) + ts.On("GetNameFor", 4).Return("name") + + Convey("Given a single linkage iterator's shape", t, func() { + queryShape = make(map[string]interface{}) + hasa := buildHasaWithTag(ts, "tag", "cool") + hasa.AddTag("top") + OutputQueryShapeForIterator(hasa, ts, &queryShape) + + Convey("It should have three nodes and one link", func() { + nodes := queryShape["nodes"].([]Node) + links := queryShape["links"].([]Link) + So(len(nodes), ShouldEqual, 3) + So(len(links), ShouldEqual, 1) + }) + + Convey("These nodes should be correctly tagged", func() { + nodes := queryShape["nodes"].([]Node) + So(nodes[0].Tags, ShouldResemble, []string{"tag"}) + So(nodes[1].IsLinkNode, ShouldEqual, true) + So(nodes[2].Tags, ShouldResemble, []string{"top"}) + + }) + + Convey("The link should be correctly typed", func() { + nodes := queryShape["nodes"].([]Node) + links := queryShape["links"].([]Link) + So(links[0].Source, ShouldEqual, nodes[2].Id) + So(links[0].Target, ShouldEqual, nodes[0].Id) + So(links[0].LinkNode, ShouldEqual, nodes[1].Id) + So(links[0].Pred, ShouldEqual, 0) + + }) + + }) + + Convey("Given a name-of-an-and-iterator's shape", t, func() { + queryShape = make(map[string]interface{}) + hasa1 := buildHasaWithTag(ts, "tag1", "cool") + hasa1.AddTag("hasa1") + hasa2 := buildHasaWithTag(ts, "tag2", "fun") + hasa1.AddTag("hasa2") + andInternal := NewAnd() + andInternal.AddSubIterator(hasa1) + andInternal.AddSubIterator(hasa2) + fixed_pred := ts.FixedIterator() + fixed_pred.AddValue(ts.GetIdFor("name")) + lto1 := NewLinksTo(ts, andInternal, graph.Subject) + lto2 := NewLinksTo(ts, fixed_pred, graph.Predicate) + and := NewAnd() + and.AddSubIterator(lto1) + and.AddSubIterator(lto2) + hasa := NewHasA(ts, and, graph.Object) + OutputQueryShapeForIterator(hasa, ts, &queryShape) + + Convey("It should have seven nodes and three links", func() { + nodes := queryShape["nodes"].([]Node) + links := queryShape["links"].([]Link) + So(len(nodes), ShouldEqual, 7) + So(len(links), ShouldEqual, 3) + }) + + Convey("Three of the nodes are link nodes, four aren't", func() { + nodes := queryShape["nodes"].([]Node) + count := 0 + for _, node := range nodes { + if node.IsLinkNode { + count++ + } + } + So(count, ShouldEqual, 3) + }) + + Convey("These nodes should be correctly tagged", nil) + + }) + +} diff --git a/graph/iterator/value_comparison_iterator.go b/graph/iterator/value_comparison_iterator.go new file mode 100644 index 0000000..70b0978 --- /dev/null +++ b/graph/iterator/value_comparison_iterator.go @@ -0,0 +1,190 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// "Value Comparison" is a unary operator -- a filter across the values in the +// relevant subiterator. +// +// This is hugely useful for things like provenance, but value ranges in general +// come up from time to time. At *worst* we're as big as our underlying iterator. +// At best, we're the null iterator. +// +// This is ripe for backend-side optimization. If you can run a value iterator, +// from a sorted set -- some sort of value index, then go for it. +// +// In MQL terms, this is the [{"age>=": 21}] concept. + +import ( + "fmt" + "log" + "strconv" + "strings" + + "github.com/google/cayley/graph" +) + +type Operator int + +const ( + kCompareLT Operator = iota + kCompareLTE + kCompareGT + kCompareGTE + // Why no Equals? Because that's usually an AndIterator. +) + +type Comparison struct { + Base + subIt graph.Iterator + op Operator + val interface{} + ts graph.TripleStore +} + +func NewComparison(sub graph.Iterator, op Operator, val interface{}, ts graph.TripleStore) *Comparison { + var vc Comparison + BaseInit(&vc.Base) + vc.subIt = sub + vc.op = op + vc.val = val + vc.ts = ts + return &vc +} + +// Here's the non-boilerplate part of the ValueComparison iterator. Given a value +// and our operator, determine whether or not we meet the requirement. +func (it *Comparison) doComparison(val graph.TSVal) bool { + //TODO(barakmich): Implement string comparison. + nodeStr := it.ts.GetNameFor(val) + switch cVal := it.val.(type) { + case int: + cInt := int64(cVal) + intVal, err := strconv.ParseInt(nodeStr, 10, 64) + if err != nil { + return false + } + return RunIntOp(intVal, it.op, cInt) + case int64: + intVal, err := strconv.ParseInt(nodeStr, 10, 64) + if err != nil { + return false + } + return RunIntOp(intVal, it.op, cVal) + default: + return true + } +} + +func (it *Comparison) Close() { + it.subIt.Close() +} + +func RunIntOp(a int64, op Operator, b int64) bool { + switch op { + case kCompareLT: + return a < b + case kCompareLTE: + return a <= b + case kCompareGT: + return a > b + case kCompareGTE: + return a >= b + default: + log.Fatal("Unknown operator type") + return false + } +} + +func (it *Comparison) Reset() { + it.subIt.Reset() +} + +func (it *Comparison) Clone() graph.Iterator { + out := NewComparison(it.subIt.Clone(), it.op, it.val, it.ts) + out.CopyTagsFrom(it) + return out +} + +func (it *Comparison) Next() (graph.TSVal, bool) { + var val graph.TSVal + var ok bool + for { + val, ok = it.subIt.Next() + if !ok { + return nil, false + } + if it.doComparison(val) { + break + } + } + it.Last = val + return val, ok +} + +func (it *Comparison) NextResult() bool { + for { + hasNext := it.subIt.NextResult() + if !hasNext { + return false + } + if it.doComparison(it.subIt.LastResult()) { + return true + } + } + it.Last = it.subIt.LastResult() + return true +} + +func (it *Comparison) Check(val graph.TSVal) bool { + if !it.doComparison(val) { + return false + } + return it.subIt.Check(val) +} + +// If we failed the check, then the subiterator should not contribute to the result +// set. Otherwise, go ahead and tag it. +func (it *Comparison) TagResults(out *map[string]graph.TSVal) { + it.Base.TagResults(out) + it.subIt.TagResults(out) +} + +// Registers the value-comparison iterator. +func (it *Comparison) Type() string { return "value-comparison" } + +// Prints the value-comparison and its subiterator. +func (it *Comparison) DebugString(indent int) string { + return fmt.Sprintf("%s(%s\n%s)", + strings.Repeat(" ", indent), + it.Type(), it.subIt.DebugString(indent+4)) +} + +// There's nothing to optimize, locally, for a value-comparison iterator. +// Replace the underlying iterator if need be. +// potentially replace it. +func (it *Comparison) Optimize() (graph.Iterator, bool) { + newSub, changed := it.subIt.Optimize() + if changed { + it.subIt.Close() + it.subIt = newSub + } + return it, false +} + +// We're only as expensive as our subiterator. +// Again, optimized value comparison iterators should do better. +func (it *Comparison) GetStats() *graph.IteratorStats { + return it.subIt.GetStats() +} diff --git a/graph/iterator/value_comparison_iterator_test.go b/graph/iterator/value_comparison_iterator_test.go new file mode 100644 index 0000000..5e80f71 --- /dev/null +++ b/graph/iterator/value_comparison_iterator_test.go @@ -0,0 +1,128 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +import ( + "testing" + + "github.com/google/cayley/graph" +) + +func SetupMockTripleStore(nameMap map[string]int) *TestTripleStore { + ts := new(TestTripleStore) + for k, v := range nameMap { + ts.On("GetIdFor", k).Return(v) + ts.On("GetNameFor", v).Return(k) + } + return ts +} + +func SimpleValueTripleStore() *TestTripleStore { + ts := SetupMockTripleStore(map[string]int{ + "0": 0, + "1": 1, + "2": 2, + "3": 3, + "4": 4, + "5": 5, + }) + return ts +} + +func BuildFixedIterator() *Fixed { + fixed := newFixed() + fixed.AddValue(0) + fixed.AddValue(1) + fixed.AddValue(2) + fixed.AddValue(3) + fixed.AddValue(4) + return fixed +} + +func checkIteratorContains(ts graph.TripleStore, it graph.Iterator, expected []string, t *testing.T) { + var actual []string + actual = nil + for { + val, ok := it.Next() + if !ok { + break + } + actual = append(actual, ts.GetNameFor(val)) + } + actualSet := actual[:] + for _, a := range expected { + found := false + for j, b := range actualSet { + if a == b { + actualSet = append(actualSet[:j], actualSet[j+1:]...) + found = true + break + } + } + if !found { + t.Error("Couldn't find", a, "in actual output.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) + return + } + } + if len(actualSet) != 0 { + t.Error("Actual output has more than expected.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) + } +} + +func TestWorkingIntValueComparison(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewComparison(fixed, kCompareLT, int64(3), ts) + checkIteratorContains(ts, vc, []string{"0", "1", "2"}, t) +} + +func TestFailingIntValueComparison(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewComparison(fixed, kCompareLT, int64(0), ts) + checkIteratorContains(ts, vc, []string{}, t) +} + +func TestWorkingGT(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewComparison(fixed, kCompareGT, int64(2), ts) + checkIteratorContains(ts, vc, []string{"3", "4"}, t) +} + +func TestWorkingGTE(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewComparison(fixed, kCompareGTE, int64(2), ts) + checkIteratorContains(ts, vc, []string{"2", "3", "4"}, t) +} + +func TestVCICheck(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewComparison(fixed, kCompareGTE, int64(2), ts) + if vc.Check(1) { + t.Error("1 is less than 2, should be GTE") + } + if !vc.Check(2) { + t.Error("2 is GTE 2") + } + if !vc.Check(3) { + t.Error("3 is GTE 2") + } + if vc.Check(5) { + t.Error("5 is not in the underlying iterator") + } +} diff --git a/graph/leveldb/all_iterator.go b/graph/leveldb/all_iterator.go index 35d403f..6303b84 100644 --- a/graph/leveldb/all_iterator.go +++ b/graph/leveldb/all_iterator.go @@ -19,25 +19,26 @@ import ( "fmt" "strings" - "github.com/syndtr/goleveldb/leveldb/iterator" + ldbit "github.com/syndtr/goleveldb/leveldb/iterator" "github.com/syndtr/goleveldb/leveldb/opt" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) type AllIterator struct { - graph.BaseIterator + iterator.Base prefix []byte dir graph.Direction open bool - it iterator.Iterator + it ldbit.Iterator ts *TripleStore ro *opt.ReadOptions } func NewAllIterator(prefix string, d graph.Direction, ts *TripleStore) *AllIterator { var it AllIterator - graph.BaseIteratorInit(&it.BaseIterator) + iterator.BaseInit(&it.Base) it.ro = &opt.ReadOptions{} it.ro.DontFillCache = true it.it = ts.db.NewIterator(nil, it.ro) diff --git a/graph/leveldb/iterator.go b/graph/leveldb/iterator.go index bb5fa07..40d2b7b 100644 --- a/graph/leveldb/iterator.go +++ b/graph/leveldb/iterator.go @@ -19,19 +19,20 @@ import ( "fmt" "strings" - "github.com/syndtr/goleveldb/leveldb/iterator" + ldbit "github.com/syndtr/goleveldb/leveldb/iterator" "github.com/syndtr/goleveldb/leveldb/opt" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) type Iterator struct { - graph.BaseIterator + iterator.Base nextPrefix []byte checkId []byte dir graph.Direction open bool - it iterator.Iterator + it ldbit.Iterator ts *TripleStore ro *opt.ReadOptions originalPrefix string @@ -39,7 +40,7 @@ type Iterator struct { func NewIterator(prefix string, d graph.Direction, value graph.TSVal, ts *TripleStore) *Iterator { var it Iterator - graph.BaseIteratorInit(&it.BaseIterator) + iterator.BaseInit(&it.Base) it.checkId = value.([]byte) it.dir = d it.originalPrefix = prefix diff --git a/graph/leveldb/leveldb_test.go b/graph/leveldb/leveldb_test.go index 91263c6..9982eea 100644 --- a/graph/leveldb/leveldb_test.go +++ b/graph/leveldb/leveldb_test.go @@ -23,21 +23,22 @@ import ( . "github.com/smartystreets/goconvey/convey" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func makeTripleSet() []*graph.Triple { tripleSet := []*graph.Triple{ - graph.MakeTriple("A", "follows", "B", ""), - graph.MakeTriple("C", "follows", "B", ""), - graph.MakeTriple("C", "follows", "D", ""), - graph.MakeTriple("D", "follows", "B", ""), - graph.MakeTriple("B", "follows", "F", ""), - graph.MakeTriple("F", "follows", "G", ""), - graph.MakeTriple("D", "follows", "G", ""), - graph.MakeTriple("E", "follows", "F", ""), - graph.MakeTriple("B", "status", "cool", "status_graph"), - graph.MakeTriple("D", "status", "cool", "status_graph"), - graph.MakeTriple("G", "status", "cool", "status_graph"), + {"A", "follows", "B", ""}, + {"C", "follows", "B", ""}, + {"C", "follows", "D", ""}, + {"D", "follows", "B", ""}, + {"B", "follows", "F", ""}, + {"F", "follows", "G", ""}, + {"D", "follows", "G", ""}, + {"E", "follows", "F", ""}, + {"B", "status", "cool", "status_graph"}, + {"D", "status", "cool", "status_graph"}, + {"G", "status", "cool", "status_graph"}, } return tripleSet } @@ -49,7 +50,7 @@ func extractTripleFromIterator(ts graph.TripleStore, it graph.Iterator) []string if !ok { break } - output = append(output, ts.GetTriple(val).ToString()) + output = append(output, ts.GetTriple(val).String()) } return output } @@ -111,7 +112,7 @@ func TestLoadDatabase(t *testing.T) { ts = NewTripleStore(tmpDir, nil) Convey("Can load a single triple", func() { - ts.AddTriple(graph.MakeTriple("Something", "points_to", "Something Else", "context")) + ts.AddTriple(&graph.Triple{"Something", "points_to", "Something Else", "context"}) So(ts.GetNameFor(ts.GetIdFor("Something")), ShouldEqual, "Something") So(ts.Size(), ShouldEqual, 1) }) @@ -123,7 +124,7 @@ func TestLoadDatabase(t *testing.T) { So(ts.GetSizeFor(ts.GetIdFor("B")), ShouldEqual, 5) Convey("Can delete triples", func() { - ts.RemoveTriple(graph.MakeTriple("A", "follows", "B", "")) + ts.RemoveTriple(&graph.Triple{"A", "follows", "B", ""}) So(ts.Size(), ShouldEqual, 10) So(ts.GetSizeFor(ts.GetIdFor("B")), ShouldEqual, 4) }) @@ -220,9 +221,9 @@ func TestIterator(t *testing.T) { set := makeTripleSet() var string_set []string for _, t := range set { - string_set = append(string_set, t.ToString()) + string_set = append(string_set, t.String()) } - So(triple.ToString(), ShouldBeIn, string_set) + So(triple.String(), ShouldBeIn, string_set) }) Reset(func() { @@ -252,8 +253,8 @@ func TestSetIterator(t *testing.T) { Convey("Containing the right things", func() { expected := []string{ - graph.MakeTriple("C", "follows", "B", "").ToString(), - graph.MakeTriple("C", "follows", "D", "").ToString(), + (&graph.Triple{"C", "follows", "B", ""}).String(), + (&graph.Triple{"C", "follows", "D", ""}).String(), } actual := extractTripleFromIterator(ts, it) sort.Strings(actual) @@ -262,13 +263,13 @@ func TestSetIterator(t *testing.T) { }) Convey("And checkable", func() { - and := graph.NewAndIterator() + and := iterator.NewAnd() and.AddSubIterator(ts.GetTriplesAllIterator()) and.AddSubIterator(it) expected := []string{ - graph.MakeTriple("C", "follows", "B", "").ToString(), - graph.MakeTriple("C", "follows", "D", "").ToString(), + (&graph.Triple{"C", "follows", "B", ""}).String(), + (&graph.Triple{"C", "follows", "D", ""}).String(), } actual := extractTripleFromIterator(ts, and) sort.Strings(actual) @@ -286,8 +287,8 @@ func TestSetIterator(t *testing.T) { Convey("Containing the right things", func() { expected := []string{ - graph.MakeTriple("B", "follows", "F", "").ToString(), - graph.MakeTriple("E", "follows", "F", "").ToString(), + (&graph.Triple{"B", "follows", "F", ""}).String(), + (&graph.Triple{"E", "follows", "F", ""}).String(), } actual := extractTripleFromIterator(ts, it) sort.Strings(actual) @@ -296,12 +297,12 @@ func TestSetIterator(t *testing.T) { }) Convey("Mutually and-checkable", func() { - and := graph.NewAndIterator() + and := iterator.NewAnd() and.AddSubIterator(ts.GetTripleIterator(graph.Subject, ts.GetIdFor("B"))) and.AddSubIterator(it) expected := []string{ - graph.MakeTriple("B", "follows", "F", "").ToString(), + (&graph.Triple{"B", "follows", "F", ""}).String(), } actual := extractTripleFromIterator(ts, and) sort.Strings(actual) @@ -316,9 +317,9 @@ func TestSetIterator(t *testing.T) { Convey("Containing the right things", func() { expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("D", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("G", "status", "cool", "status_graph").ToString(), + (&graph.Triple{"B", "status", "cool", "status_graph"}).String(), + (&graph.Triple{"D", "status", "cool", "status_graph"}).String(), + (&graph.Triple{"G", "status", "cool", "status_graph"}).String(), } actual := extractTripleFromIterator(ts, it) sort.Strings(actual) @@ -333,9 +334,9 @@ func TestSetIterator(t *testing.T) { Convey("Containing the right things", func() { expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("D", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("G", "status", "cool", "status_graph").ToString(), + (&graph.Triple{"B", "status", "cool", "status_graph"}).String(), + (&graph.Triple{"D", "status", "cool", "status_graph"}).String(), + (&graph.Triple{"G", "status", "cool", "status_graph"}).String(), } actual := extractTripleFromIterator(ts, it) sort.Strings(actual) @@ -344,26 +345,26 @@ func TestSetIterator(t *testing.T) { }) Convey("Can be cross-checked", func() { - and := graph.NewAndIterator() + and := iterator.NewAnd() // Order is important and.AddSubIterator(ts.GetTripleIterator(graph.Subject, ts.GetIdFor("B"))) and.AddSubIterator(it) expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), + (&graph.Triple{"B", "status", "cool", "status_graph"}).String(), } actual := extractTripleFromIterator(ts, and) So(actual, ShouldResemble, expected) }) Convey("Can check against other iterators", func() { - and := graph.NewAndIterator() + and := iterator.NewAnd() // Order is important and.AddSubIterator(it) and.AddSubIterator(ts.GetTripleIterator(graph.Subject, ts.GetIdFor("B"))) expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), + (&graph.Triple{"B", "status", "cool", "status_graph"}).String(), } actual := extractTripleFromIterator(ts, and) So(actual, ShouldResemble, expected) @@ -397,10 +398,10 @@ func TestOptimize(t *testing.T) { ts.AddTripleSet(makeTripleSet()) Convey("With an linksto-fixed pair", func() { - fixed := ts.MakeFixed() + fixed := ts.FixedIterator() fixed.AddValue(ts.GetIdFor("F")) fixed.AddTag("internal") - lto = graph.NewLinksToIterator(ts, fixed, graph.Object) + lto = iterator.NewLinksTo(ts, fixed, graph.Object) Convey("Creates an appropriate iterator", func() { oldIt := lto.Clone() diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index e386bf4..0771f9c 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -29,6 +29,7 @@ import ( "github.com/syndtr/goleveldb/leveldb/util" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) const ( @@ -143,7 +144,7 @@ func (ts *TripleStore) AddTriple(t *graph.Triple) { ts.buildWrite(batch, t) err := ts.db.Write(batch, ts.writeopts) if err != nil { - glog.Errorf("Couldn't write to DB for triple %s", t.ToString()) + glog.Errorf("Couldn't write to DB for triple %s", t) return } ts.size++ @@ -180,7 +181,7 @@ func (ts *TripleStore) RemoveTriple(t *graph.Triple) { } err = ts.db.Write(batch, nil) if err != nil { - glog.Errorf("Couldn't delete triple %s", t.ToString()) + glog.Errorf("Couldn't delete triple %s", t) return } ts.size-- @@ -189,7 +190,7 @@ func (ts *TripleStore) RemoveTriple(t *graph.Triple) { func (ts *TripleStore) buildTripleWrite(batch *leveldb.Batch, t *graph.Triple) { bytes, err := json.Marshal(*t) if err != nil { - glog.Errorf("Couldn't write to buffer for triple %s\n %s\n", t.ToString(), err) + glog.Errorf("Couldn't write to buffer for triple %s\n %s\n", t, err) return } batch.Put(ts.createKeyFor(spo, t), bytes) @@ -439,6 +440,6 @@ func compareBytes(a, b graph.TSVal) bool { return bytes.Equal(a.([]uint8), b.([]uint8)) } -func (ts *TripleStore) MakeFixed() *graph.FixedIterator { - return graph.NewFixedIteratorWithCompare(compareBytes) +func (ts *TripleStore) FixedIterator() graph.FixedIterator { + return iterator.NewFixedIteratorWithCompare(compareBytes) } diff --git a/graph/leveldb/triplestore_iterator_optimize.go b/graph/leveldb/triplestore_iterator_optimize.go index cf31f71..28cc46e 100644 --- a/graph/leveldb/triplestore_iterator_optimize.go +++ b/graph/leveldb/triplestore_iterator_optimize.go @@ -16,18 +16,19 @@ package leveldb import ( "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func (ts *TripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { switch it.Type() { case "linksto": - return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) + return ts.optimizeLinksTo(it.(*iterator.LinksTo)) } return it, false } -func (ts *TripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { +func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { subs := it.GetSubIterators() if len(subs) != 1 { return it, false diff --git a/graph/linksto_iterator.go b/graph/linksto_iterator.go deleted file mode 100644 index b67c306..0000000 --- a/graph/linksto_iterator.go +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the LinksTo iterator. A LinksTo takes a -// subiterator of nodes, and contains an iteration of links which "link to" -// those nodes in a given direction. -// -// Next()ing a LinksTo is straightforward -- iterate through all links to // -// things in the subiterator, and then advance the subiterator, and do it again. -// LinksTo is therefore sensitive to growing with a fanout. (A small-sized -// subiterator could cause LinksTo to be large). -// -// Check()ing a LinksTo means, given a link, take the direction we care about -// and check if it's in our subiterator. Checking is therefore fairly cheap, and -// similar to checking the subiterator alone. -// -// Can be seen as the dual of the HasA iterator. - -import ( - "fmt" - "strings" -) - -// A LinksTo has a reference back to the TripleStore (to create the iterators -// for each node) the subiterator, and the direction the iterator comes from. -// `next_it` is the tempoarary iterator held per result in `primary_it`. -type LinksToIterator struct { - BaseIterator - ts TripleStore - primaryIt Iterator - dir Direction - nextIt Iterator -} - -// Construct a new LinksTo iterator around a direction and a subiterator of -// nodes. -func NewLinksToIterator(ts TripleStore, it Iterator, d Direction) *LinksToIterator { - var lto LinksToIterator - BaseIteratorInit(<o.BaseIterator) - lto.ts = ts - lto.primaryIt = it - lto.dir = d - lto.nextIt = &NullIterator{} - return <o -} - -func (it *LinksToIterator) Reset() { - it.primaryIt.Reset() - if it.nextIt != nil { - it.nextIt.Close() - } - it.nextIt = &NullIterator{} -} - -func (it *LinksToIterator) Clone() Iterator { - out := NewLinksToIterator(it.ts, it.primaryIt.Clone(), it.dir) - out.CopyTagsFrom(it) - return out -} - -// Return the direction under consideration. -func (it *LinksToIterator) Direction() Direction { return it.dir } - -// Tag these results, and our subiterator's results. -func (it *LinksToIterator) TagResults(out *map[string]TSVal) { - it.BaseIterator.TagResults(out) - it.primaryIt.TagResults(out) -} - -// DEPRECATED -func (it *LinksToIterator) GetResultTree() *ResultTree { - tree := NewResultTree(it.LastResult()) - tree.AddSubtree(it.primaryIt.GetResultTree()) - return tree -} - -// Print the iterator. -func (it *LinksToIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s %d direction:%s\n%s)", - strings.Repeat(" ", indent), - it.Type(), it.GetUid(), it.dir, it.primaryIt.DebugString(indent+4)) -} - -// If it checks in the right direction for the subiterator, it is a valid link -// for the LinksTo. -func (it *LinksToIterator) Check(val TSVal) bool { - CheckLogIn(it, val) - node := it.ts.GetTripleDirection(val, it.dir) - if it.primaryIt.Check(node) { - it.Last = val - return CheckLogOut(it, val, true) - } - return CheckLogOut(it, val, false) -} - -// Return a list containing only our subiterator. -func (it *LinksToIterator) GetSubIterators() []Iterator { - return []Iterator{it.primaryIt} -} - -// Optimize the LinksTo, by replacing it if it can be. -func (it *LinksToIterator) Optimize() (Iterator, bool) { - newPrimary, changed := it.primaryIt.Optimize() - if changed { - it.primaryIt = newPrimary - if it.primaryIt.Type() == "null" { - it.nextIt.Close() - return it.primaryIt, true - } - } - // Ask the TripleStore if we can be replaced. Often times, this is a great - // optimization opportunity (there's a fixed iterator underneath us, for - // example). - newReplacement, hasOne := it.ts.OptimizeIterator(it) - if hasOne { - it.Close() - return newReplacement, true - } - return it, false -} - -// Next()ing a LinksTo operates as described above. -func (it *LinksToIterator) Next() (TSVal, bool) { - NextLogIn(it) - val, ok := it.nextIt.Next() - if !ok { - // Subiterator is empty, get another one - candidate, ok := it.primaryIt.Next() - if !ok { - // We're out of nodes in our subiterator, so we're done as well. - return NextLogOut(it, 0, false) - } - it.nextIt.Close() - it.nextIt = it.ts.GetTripleIterator(it.dir, candidate) - // Recurse -- return the first in the next set. - return it.Next() - } - it.Last = val - return NextLogOut(it, val, ok) -} - -// Close our subiterators. -func (it *LinksToIterator) Close() { - it.nextIt.Close() - it.primaryIt.Close() -} - -// We won't ever have a new result, but our subiterators might. -func (it *LinksToIterator) NextResult() bool { - return it.primaryIt.NextResult() -} - -// Register the LinksTo. -func (it *LinksToIterator) Type() string { return "linksto" } - -// Return a guess as to how big or costly it is to next the iterator. -func (it *LinksToIterator) GetStats() *IteratorStats { - subitStats := it.primaryIt.GetStats() - // TODO(barakmich): These should really come from the triplestore itself - fanoutFactor := int64(20) - checkConstant := int64(1) - nextConstant := int64(2) - return &IteratorStats{ - NextCost: nextConstant + subitStats.NextCost, - CheckCost: checkConstant + subitStats.CheckCost, - Size: fanoutFactor * subitStats.Size, - } -} diff --git a/graph/linksto_iterator_test.go b/graph/linksto_iterator_test.go deleted file mode 100644 index 7fbed33..0000000 --- a/graph/linksto_iterator_test.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" -) - -func TestLinksTo(t *testing.T) { - ts := new(TestTripleStore) - tsFixed := newFixedIterator() - tsFixed.AddValue(2) - ts.On("GetIdFor", "cool").Return(1) - ts.On("GetTripleIterator", Object, 1).Return(tsFixed) - fixed := newFixedIterator() - fixed.AddValue(ts.GetIdFor("cool")) - lto := NewLinksToIterator(ts, fixed, Object) - val, ok := lto.Next() - if !ok { - t.Error("At least one triple matches the fixed object") - } - if val != 2 { - t.Errorf("Triple index 2, such as %s, should match %s", ts.GetTriple(2), ts.GetTriple(val)) - } -} diff --git a/graph/memstore/all_iterator.go b/graph/memstore/all_iterator.go index 8211e06..f2c5b8e 100644 --- a/graph/memstore/all_iterator.go +++ b/graph/memstore/all_iterator.go @@ -16,22 +16,23 @@ package memstore import ( "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) type AllIterator struct { - graph.Int64AllIterator + iterator.Int64 ts *TripleStore } func NewMemstoreAllIterator(ts *TripleStore) *AllIterator { var out AllIterator - out.Int64AllIterator = *graph.NewInt64AllIterator(1, ts.idCounter-1) + out.Int64 = *iterator.NewInt64(1, ts.idCounter-1) out.ts = ts return &out } func (it *AllIterator) Next() (graph.TSVal, bool) { - next, out := it.Int64AllIterator.Next() + next, out := it.Int64.Next() if !out { return next, out } diff --git a/graph/memstore/iterator.go b/graph/memstore/iterator.go index 307027d..9551fa1 100644 --- a/graph/memstore/iterator.go +++ b/graph/memstore/iterator.go @@ -22,10 +22,11 @@ import ( "github.com/petar/GoLLRB/llrb" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) type Iterator struct { - graph.BaseIterator + iterator.Base tree *llrb.LLRB data string isRunning bool @@ -53,7 +54,7 @@ func IterateOne(tree *llrb.LLRB, last Int64) Int64 { func NewLlrbIterator(tree *llrb.LLRB, data string) *Iterator { var it Iterator - graph.BaseIteratorInit(&it.BaseIterator) + iterator.BaseInit(&it.Base) it.tree = tree it.iterLast = Int64(-1) it.data = data diff --git a/graph/memstore/testing_memstore.go b/graph/memstore/testing_memstore.go index 8ac2c10..e32dc5d 100644 --- a/graph/memstore/testing_memstore.go +++ b/graph/memstore/testing_memstore.go @@ -30,16 +30,16 @@ import "github.com/google/cayley/graph" func MakeTestingMemstore() *TripleStore { ts := NewTripleStore() - ts.AddTriple(graph.MakeTriple("A", "follows", "B", "")) - ts.AddTriple(graph.MakeTriple("C", "follows", "B", "")) - ts.AddTriple(graph.MakeTriple("C", "follows", "D", "")) - ts.AddTriple(graph.MakeTriple("D", "follows", "B", "")) - ts.AddTriple(graph.MakeTriple("B", "follows", "F", "")) - ts.AddTriple(graph.MakeTriple("F", "follows", "G", "")) - ts.AddTriple(graph.MakeTriple("D", "follows", "G", "")) - ts.AddTriple(graph.MakeTriple("E", "follows", "F", "")) - ts.AddTriple(graph.MakeTriple("B", "status", "cool", "status_graph")) - ts.AddTriple(graph.MakeTriple("D", "status", "cool", "status_graph")) - ts.AddTriple(graph.MakeTriple("G", "status", "cool", "status_graph")) + ts.AddTriple(&graph.Triple{"A", "follows", "B", ""}) + ts.AddTriple(&graph.Triple{"C", "follows", "B", ""}) + ts.AddTriple(&graph.Triple{"C", "follows", "D", ""}) + ts.AddTriple(&graph.Triple{"D", "follows", "B", ""}) + ts.AddTriple(&graph.Triple{"B", "follows", "F", ""}) + ts.AddTriple(&graph.Triple{"F", "follows", "G", ""}) + ts.AddTriple(&graph.Triple{"D", "follows", "G", ""}) + ts.AddTriple(&graph.Triple{"E", "follows", "F", ""}) + ts.AddTriple(&graph.Triple{"B", "status", "cool", "status_graph"}) + ts.AddTriple(&graph.Triple{"D", "status", "cool", "status_graph"}) + ts.AddTriple(&graph.Triple{"G", "status", "cool", "status_graph"}) return ts } diff --git a/graph/memstore/triplestore.go b/graph/memstore/triplestore.go index 76e0ba0..5bc5d0c 100644 --- a/graph/memstore/triplestore.go +++ b/graph/memstore/triplestore.go @@ -19,6 +19,7 @@ import ( "github.com/barakmich/glog" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" "github.com/petar/GoLLRB/llrb" ) @@ -226,7 +227,7 @@ func (ts *TripleStore) GetTripleIterator(d graph.Direction, value graph.TSVal) g if ok { return NewLlrbIterator(index, data) } - return &graph.NullIterator{} + return &iterator.Null{} } func (ts *TripleStore) Size() int64 { @@ -238,7 +239,7 @@ func (ts *TripleStore) DebugPrint() { if i == 0 { continue } - glog.V(2).Infoln("%d: %s", i, t.ToString()) + glog.V(2).Infoln("%d: %s", i, t) } } @@ -251,11 +252,11 @@ func (ts *TripleStore) GetNameFor(id graph.TSVal) string { } func (ts *TripleStore) GetTriplesAllIterator() graph.Iterator { - return graph.NewInt64AllIterator(0, ts.Size()) + return iterator.NewInt64(0, ts.Size()) } -func (ts *TripleStore) MakeFixed() *graph.FixedIterator { - return graph.NewFixedIteratorWithCompare(graph.BasicEquality) +func (ts *TripleStore) FixedIterator() graph.FixedIterator { + return iterator.NewFixedIteratorWithCompare(iterator.BasicEquality) } func (ts *TripleStore) GetTripleDirection(val graph.TSVal, d graph.Direction) graph.TSVal { diff --git a/graph/memstore/triplestore_iterator_optimize.go b/graph/memstore/triplestore_iterator_optimize.go index 165952d..51ec958 100644 --- a/graph/memstore/triplestore_iterator_optimize.go +++ b/graph/memstore/triplestore_iterator_optimize.go @@ -16,18 +16,19 @@ package memstore import ( "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func (ts *TripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { switch it.Type() { case "linksto": - return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) + return ts.optimizeLinksTo(it.(*iterator.LinksTo)) } return it, false } -func (ts *TripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { +func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { subs := it.GetSubIterators() if len(subs) != 1 { return it, false diff --git a/graph/memstore/triplestore_test.go b/graph/memstore/triplestore_test.go index 4d98c0c..15c5482 100644 --- a/graph/memstore/triplestore_test.go +++ b/graph/memstore/triplestore_test.go @@ -21,6 +21,7 @@ import ( . "github.com/smartystreets/goconvey/convey" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func TestMemstore(t *testing.T) { @@ -38,19 +39,19 @@ func TestMemstore(t *testing.T) { func TestIteratorsAndNextResultOrderA(t *testing.T) { ts := MakeTestingMemstore() - fixed := ts.MakeFixed() + fixed := ts.FixedIterator() fixed.AddValue(ts.GetIdFor("C")) all := ts.GetNodesAllIterator() - lto := graph.NewLinksToIterator(ts, all, graph.Object) - innerAnd := graph.NewAndIterator() + lto := iterator.NewLinksTo(ts, all, graph.Object) + innerAnd := iterator.NewAnd() - fixed2 := ts.MakeFixed() + fixed2 := ts.FixedIterator() fixed2.AddValue(ts.GetIdFor("follows")) - lto2 := graph.NewLinksToIterator(ts, fixed2, graph.Predicate) + lto2 := iterator.NewLinksTo(ts, fixed2, graph.Predicate) innerAnd.AddSubIterator(lto2) innerAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(ts, innerAnd, graph.Subject) - outerAnd := graph.NewAndIterator() + hasa := iterator.NewHasA(ts, innerAnd, graph.Subject) + outerAnd := iterator.NewAnd() outerAnd.AddSubIterator(fixed) outerAnd.AddSubIterator(hasa) val, ok := outerAnd.Next() @@ -96,9 +97,9 @@ func CompareStringSlices(t *testing.T, expected []string, actual []string) { func TestLinksToOptimization(t *testing.T) { ts := MakeTestingMemstore() - fixed := ts.MakeFixed() + fixed := ts.FixedIterator() fixed.AddValue(ts.GetIdFor("cool")) - lto := graph.NewLinksToIterator(ts, fixed, graph.Object) + lto := iterator.NewLinksTo(ts, fixed, graph.Object) lto.AddTag("foo") newIt, changed := lto.Optimize() if !changed { @@ -119,17 +120,17 @@ func TestLinksToOptimization(t *testing.T) { func TestRemoveTriple(t *testing.T) { ts := MakeTestingMemstore() - ts.RemoveTriple(graph.MakeTriple("E", "follows", "F", "")) - fixed := ts.MakeFixed() + ts.RemoveTriple(&graph.Triple{"E", "follows", "F", ""}) + fixed := ts.FixedIterator() fixed.AddValue(ts.GetIdFor("E")) - lto := graph.NewLinksToIterator(ts, fixed, graph.Subject) - fixed2 := ts.MakeFixed() + lto := iterator.NewLinksTo(ts, fixed, graph.Subject) + fixed2 := ts.FixedIterator() fixed2.AddValue(ts.GetIdFor("follows")) - lto2 := graph.NewLinksToIterator(ts, fixed2, graph.Predicate) - innerAnd := graph.NewAndIterator() + lto2 := iterator.NewLinksTo(ts, fixed2, graph.Predicate) + innerAnd := iterator.NewAnd() innerAnd.AddSubIterator(lto2) innerAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(ts, innerAnd, graph.Object) + hasa := iterator.NewHasA(ts, innerAnd, graph.Object) newIt, _ := hasa.Optimize() _, ok := newIt.Next() if ok { diff --git a/graph/mock_ts.go b/graph/mock_ts.go deleted file mode 100644 index 1145f9d..0000000 --- a/graph/mock_ts.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// A quickly mocked version of the TripleStore interface, for use in tests. -// Can better used Mock.Called but will fill in as needed. - -import ( - "github.com/stretchrcom/testify/mock" -) - -type TestTripleStore struct { - mock.Mock -} - -func (ts *TestTripleStore) GetIdFor(s string) TSVal { - args := ts.Mock.Called(s) - return args.Get(0) -} -func (ts *TestTripleStore) AddTriple(*Triple) {} -func (ts *TestTripleStore) AddTripleSet([]*Triple) {} -func (ts *TestTripleStore) GetTriple(TSVal) *Triple { return &Triple{} } -func (ts *TestTripleStore) GetTripleIterator(d Direction, i TSVal) Iterator { - args := ts.Mock.Called(d, i) - return args.Get(0).(Iterator) -} -func (ts *TestTripleStore) GetNodesAllIterator() Iterator { return &NullIterator{} } -func (ts *TestTripleStore) GetTriplesAllIterator() Iterator { return &NullIterator{} } -func (ts *TestTripleStore) GetIteratorByString(string, string, string) Iterator { - return &NullIterator{} -} -func (ts *TestTripleStore) GetNameFor(v TSVal) string { - args := ts.Mock.Called(v) - return args.Get(0).(string) -} -func (ts *TestTripleStore) Size() int64 { return 0 } -func (ts *TestTripleStore) DebugPrint() {} -func (ts *TestTripleStore) OptimizeIterator(it Iterator) (Iterator, bool) { - return &NullIterator{}, false -} -func (ts *TestTripleStore) MakeFixed() *FixedIterator { - return NewFixedIteratorWithCompare(BasicEquality) -} -func (ts *TestTripleStore) Close() {} -func (ts *TestTripleStore) GetTripleDirection(TSVal, Direction) TSVal { return 0 } -func (ts *TestTripleStore) RemoveTriple(t *Triple) {} diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index f6c1075..4888e10 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -23,10 +23,11 @@ import ( "labix.org/v2/mgo/bson" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) type Iterator struct { - graph.BaseIterator + iterator.Base ts *TripleStore dir graph.Direction iter *mgo.Iter @@ -40,7 +41,7 @@ type Iterator struct { func NewIterator(ts *TripleStore, collection string, d graph.Direction, val graph.TSVal) *Iterator { var m Iterator - graph.BaseIteratorInit(&m.BaseIterator) + iterator.BaseInit(&m.Base) m.name = ts.GetNameFor(val) m.collection = collection diff --git a/graph/mongo/triplestore.go b/graph/mongo/triplestore.go index 8fdd08f..2555882 100644 --- a/graph/mongo/triplestore.go +++ b/graph/mongo/triplestore.go @@ -25,6 +25,7 @@ import ( "github.com/barakmich/glog" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) const DefaultDBName = "cayley" @@ -214,11 +215,12 @@ func (ts *TripleStore) GetTriple(val graph.TSVal) *graph.Triple { if err != nil { log.Println("Error: Couldn't retrieve triple", val.(string), err) } - return graph.MakeTriple( - bsonDoc["Sub"].(string), - bsonDoc["Pred"].(string), - bsonDoc["Obj"].(string), - bsonDoc["Provenance"].(string)) + return &graph.Triple{ + bsonDoc["Subject"].(string), + bsonDoc["Predicate"].(string), + bsonDoc["Object"].(string), + bsonDoc["Provenance"].(string), + } } func (ts *TripleStore) GetTripleIterator(d graph.Direction, val graph.TSVal) graph.Iterator { @@ -264,8 +266,8 @@ func compareStrings(a, b graph.TSVal) bool { return a.(string) == b.(string) } -func (ts *TripleStore) MakeFixed() *graph.FixedIterator { - return graph.NewFixedIteratorWithCompare(compareStrings) +func (ts *TripleStore) FixedIterator() graph.FixedIterator { + return iterator.NewFixedIteratorWithCompare(compareStrings) } func (ts *TripleStore) Close() { @@ -303,9 +305,9 @@ func (ts *TripleStore) BulkLoad(t_chan chan *graph.Triple) { var p_key = this["_id"].slice(len / 4, 2 * len / 4) var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4) var c_key = this["_id"].slice(3 * len / 4) - emit(s_key, {"_id": s_key, "Name" : this.Sub, "Size" : 1}) - emit(p_key, {"_id": p_key, "Name" : this.Pred, "Size" : 1}) - emit(o_key, {"_id": o_key, "Name" : this.Obj, "Size" : 1}) + emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1}) + emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1}) + emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1}) if (this.Provenance != "") { emit(c_key, {"_id": c_key, "Name" : this.Provenance, "Size" : 1}) } diff --git a/graph/mongo/triplestore_iterator_optimize.go b/graph/mongo/triplestore_iterator_optimize.go index b1e50db..cca903b 100644 --- a/graph/mongo/triplestore_iterator_optimize.go +++ b/graph/mongo/triplestore_iterator_optimize.go @@ -16,18 +16,19 @@ package mongo import ( "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func (ts *TripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { switch it.Type() { case "linksto": - return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) + return ts.optimizeLinksTo(it.(*iterator.LinksTo)) } return it, false } -func (ts *TripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { +func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { subs := it.GetSubIterators() if len(subs) != 1 { return it, false diff --git a/graph/optional_iterator.go b/graph/optional_iterator.go deleted file mode 100644 index 789529a..0000000 --- a/graph/optional_iterator.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// "Optional" is kind of odd. It's not an iterator in the strictest sense, but -// it's easier to implement as an iterator. -// -// Consider what it means. It means that we have a subconstraint which we do -// not want to constrain the query -- we just want it to return the matching -// subgraph if one matches at all. By analogy to regular expressions, it is the -// '?' operator. -// -// If it were a proper iterator of its own (and indeed, a reasonable refactor -// of this iterator would be to make it such) it would contain an all iterator -// -- all things in the graph. It matches everything (as does the regex "(a)?") - -import ( - "fmt" - "strings" - - "github.com/barakmich/glog" -) - -// An optional iterator has the subconstraint iterator we wish to be optional -// and whether the last check we received was true or false. -type OptionalIterator struct { - BaseIterator - subIt Iterator - lastCheck bool -} - -// Creates a new optional iterator. -func NewOptionalIterator(it Iterator) *OptionalIterator { - var o OptionalIterator - BaseIteratorInit(&o.BaseIterator) - o.nextable = false - o.subIt = it - return &o -} - -func (it *OptionalIterator) Reset() { - it.subIt.Reset() - it.lastCheck = false -} - -func (it *OptionalIterator) Close() { - it.subIt.Close() -} - -func (it *OptionalIterator) Clone() Iterator { - out := NewOptionalIterator(it.subIt.Clone()) - out.CopyTagsFrom(it) - return out -} - -// Nexting the iterator is unsupported -- error and return an empty set. -// (As above, a reasonable alternative would be to Next() an all iterator) -func (it *OptionalIterator) Next() (TSVal, bool) { - glog.Errorln("Nexting an un-nextable iterator") - return nil, false -} - -// An optional iterator only has a next result if, (a) last time we checked -// we had any results whatsoever, and (b) there was another subresult in our -// optional subbranch. -func (it *OptionalIterator) NextResult() bool { - if it.lastCheck { - return it.subIt.NextResult() - } - return false -} - -// Check() is the real hack of this iterator. It always returns true, regardless -// of whether the subiterator matched. But we keep track of whether the subiterator -// matched for results purposes. -func (it *OptionalIterator) Check(val TSVal) bool { - checked := it.subIt.Check(val) - it.lastCheck = checked - it.Last = val - return true -} - -// If we failed the check, then the subiterator should not contribute to the result -// set. Otherwise, go ahead and tag it. -func (it *OptionalIterator) TagResults(out *map[string]TSVal) { - if it.lastCheck == false { - return - } - it.subIt.TagResults(out) -} - -// Registers the optional iterator. -func (it *OptionalIterator) Type() string { return "optional" } - -// Prints the optional and it's subiterator. -func (it *OptionalIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s tags:%s\n%s)", - strings.Repeat(" ", indent), - it.Type(), - it.Tags(), - it.subIt.DebugString(indent+4)) -} - -// There's nothing to optimize for an optional. Optimize the subiterator and -// potentially replace it. -func (it *OptionalIterator) Optimize() (Iterator, bool) { - newSub, changed := it.subIt.Optimize() - if changed { - it.subIt.Close() - it.subIt = newSub - } - return it, false -} - -// We're only as expensive as our subiterator. Except, we can't be nexted. -func (it *OptionalIterator) GetStats() *IteratorStats { - subStats := it.subIt.GetStats() - return &IteratorStats{ - CheckCost: subStats.CheckCost, - NextCost: int64(1 << 62), - Size: subStats.Size, - } -} diff --git a/graph/or_iterator.go b/graph/or_iterator.go deleted file mode 100644 index 6c1c8f2..0000000 --- a/graph/or_iterator.go +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines the or and short-circuiting or iterator. Or is the union operator for it's subiterators. -// Short-circuiting-or is a little different. It will return values from the first iterator that returns -// values at all, and then stops. -// -// Never reorders the iterators from the order they arrive. It is either the union or the first one. -// May return the same value twice -- once for each branch. - -import ( - "fmt" - "strings" -) - -type OrIterator struct { - BaseIterator - isShortCircuiting bool - internalIterators []Iterator - itCount int - currentIterator int -} - -func NewOrIterator() *OrIterator { - var or OrIterator - BaseIteratorInit(&or.BaseIterator) - or.internalIterators = make([]Iterator, 0, 20) - or.isShortCircuiting = false - or.currentIterator = -1 - return &or -} - -func NewShortCircuitOrIterator() *OrIterator { - var or OrIterator - BaseIteratorInit(&or.BaseIterator) - or.internalIterators = make([]Iterator, 0, 20) - or.isShortCircuiting = true - or.currentIterator = -1 - return &or -} - -// Reset all internal iterators -func (it *OrIterator) Reset() { - for _, sub := range it.internalIterators { - sub.Reset() - } - it.currentIterator = -1 -} - -func (it *OrIterator) Clone() Iterator { - var or *OrIterator - if it.isShortCircuiting { - or = NewShortCircuitOrIterator() - } else { - or = NewOrIterator() - } - for _, sub := range it.internalIterators { - or.AddSubIterator(sub.Clone()) - } - it.CopyTagsFrom(it) - return or -} - -// Returns a list.List of the subiterators, in order. The returned slice must not be modified. -func (it *OrIterator) GetSubIterators() []Iterator { - return it.internalIterators -} - -// Overrides BaseIterator TagResults, as it needs to add it's own results and -// recurse down it's subiterators. -func (it *OrIterator) TagResults(out *map[string]TSVal) { - it.BaseIterator.TagResults(out) - it.internalIterators[it.currentIterator].TagResults(out) -} - -// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators. -func (it *OrIterator) GetResultTree() *ResultTree { - tree := NewResultTree(it.LastResult()) - for _, sub := range it.internalIterators { - tree.AddSubtree(sub.GetResultTree()) - } - return tree -} - -// Prints information about this iterator. -func (it *OrIterator) DebugString(indent int) string { - var total string - for i, sub := range it.internalIterators { - total += strings.Repeat(" ", indent+2) - total += fmt.Sprintf("%d:\n%s\n", i, sub.DebugString(indent+4)) - } - var tags string - for _, k := range it.Tags() { - tags += fmt.Sprintf("%s;", k) - } - spaces := strings.Repeat(" ", indent+2) - - return fmt.Sprintf("%s(%s\n%stags:%s\n%sits:\n%s)", - strings.Repeat(" ", indent), - it.Type(), - spaces, - tags, - spaces, - total) -} - -// Add a subiterator to this Or iterator. Order matters. -func (it *OrIterator) AddSubIterator(sub Iterator) { - it.internalIterators = append(it.internalIterators, sub) - it.itCount++ -} - -// Returns the Next value from the Or iterator. Because the Or is the -// union of its subiterators, it must produce from all subiterators -- unless -// it's shortcircuiting, in which case, it's the first one that returns anything. -func (it *OrIterator) Next() (TSVal, bool) { - NextLogIn(it) - var curr TSVal - var exists bool - firstTime := false - for { - if it.currentIterator == -1 { - it.currentIterator = 0 - firstTime = true - } - curIt := it.internalIterators[it.currentIterator] - curr, exists = curIt.Next() - if !exists { - if it.isShortCircuiting && !firstTime { - return NextLogOut(it, nil, false) - } - it.currentIterator++ - if it.currentIterator == it.itCount { - return NextLogOut(it, nil, false) - } - } else { - it.Last = curr - return NextLogOut(it, curr, true) - } - } - panic("Somehow broke out of Next() loop in OrIterator") -} - -// Checks a value against the iterators, in order. -func (it *OrIterator) checkSubIts(val TSVal) bool { - var subIsGood = false - for i, sub := range it.internalIterators { - subIsGood = sub.Check(val) - if subIsGood { - it.currentIterator = i - break - } - } - return subIsGood -} - -// Check a value against the entire iterator, in order. -func (it *OrIterator) Check(val TSVal) bool { - CheckLogIn(it, val) - anyGood := it.checkSubIts(val) - if !anyGood { - return CheckLogOut(it, val, false) - } - it.Last = val - return CheckLogOut(it, val, true) -} - -// Returns the approximate size of the Or iterator. Because we're dealing -// with a union, we know that the largest we can be is the sum of all the iterators, -// or in the case of short-circuiting, the longest. -func (it *OrIterator) Size() (int64, bool) { - var val int64 - var b bool - if it.isShortCircuiting { - val = 0 - b = true - for _, sub := range it.internalIterators { - newval, newb := sub.Size() - if val < newval { - val = newval - } - b = newb && b - } - } else { - val = 0 - b = true - for _, sub := range it.internalIterators { - newval, newb := sub.Size() - val += newval - b = newb && b - } - } - return val, b -} - -// An Or has no NextResult of its own -- that is, there are no other values -// which satisfy our previous result that are not the result itself. Our -// subiterators might, however, so just pass the call recursively. In the case of -// shortcircuiting, only allow new results from the currently checked iterator -func (it *OrIterator) NextResult() bool { - if it.currentIterator != -1 { - return it.internalIterators[it.currentIterator].NextResult() - } - return false -} - -// Perform or-specific cleanup, of which there currently is none. -func (it *OrIterator) cleanUp() {} - -// Close this iterator, and, by extension, close the subiterators. -// Close should be idempotent, and it follows that if it's subiterators -// follow this contract, the And follows the contract. -func (it *OrIterator) Close() { - it.cleanUp() - for _, sub := range it.internalIterators { - sub.Close() - } -} - -func (it *OrIterator) Optimize() (Iterator, bool) { - old := it.GetSubIterators() - optIts := optimizeSubIterators(old) - // Close the replaced iterators (they ought to close themselves, but Close() - // is idempotent, so this just protects against any machinations). - closeIteratorList(old, nil) - newOr := NewOrIterator() - newOr.isShortCircuiting = it.isShortCircuiting - - // Add the subiterators in order. - for _, o := range optIts { - newOr.AddSubIterator(o) - } - - // Move the tags hanging on us (like any good replacement). - newOr.CopyTagsFrom(it) - - // And close ourselves but not our subiterators -- some may still be alive in - // the new And (they were unchanged upon calling Optimize() on them, at the - // start). - it.cleanUp() - return newOr, true -} - -func (it *OrIterator) GetStats() *IteratorStats { - CheckCost := int64(0) - NextCost := int64(0) - Size := int64(0) - for _, sub := range it.internalIterators { - stats := sub.GetStats() - NextCost += stats.NextCost - CheckCost += stats.CheckCost - if it.isShortCircuiting { - if Size < stats.Size { - Size = stats.Size - } - } else { - Size += stats.Size - } - } - return &IteratorStats{ - CheckCost: CheckCost, - NextCost: NextCost, - Size: Size, - } - -} - -// Register this as an "or" iterator. -func (it *OrIterator) Type() string { return "or" } diff --git a/graph/or_iterator_test.go b/graph/or_iterator_test.go deleted file mode 100644 index 9450094..0000000 --- a/graph/or_iterator_test.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - . "github.com/smartystreets/goconvey/convey" - "testing" -) - -func extractNumbersFromIterator(it Iterator) []int { - var outputNumbers []int - for { - val, ok := it.Next() - if !ok { - break - } - outputNumbers = append(outputNumbers, val.(int)) - } - return outputNumbers -} - -func TestOrIteratorBasics(t *testing.T) { - var orIt *OrIterator - - Convey("Given an Or Iterator of two fixed iterators", t, func() { - orIt = NewOrIterator() - fixed1 := newFixedIterator() - fixed1.AddValue(1) - fixed1.AddValue(2) - fixed1.AddValue(3) - fixed2 := newFixedIterator() - fixed2.AddValue(3) - fixed2.AddValue(9) - fixed2.AddValue(20) - fixed2.AddValue(21) - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - - Convey("It should guess its size.", func() { - v, _ := orIt.Size() - So(v, ShouldEqual, 7) - }) - - Convey("It should extract all the numbers, potentially twice.", func() { - allNumbers := []int{1, 2, 3, 3, 9, 20, 21} - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - orIt.Reset() - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - // Optimization works - newOr, _ := orIt.Optimize() - So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) - }) - - Convey("It should check that numbers in either iterator exist.", func() { - So(orIt.Check(2), ShouldEqual, true) - So(orIt.Check(3), ShouldEqual, true) - So(orIt.Check(21), ShouldEqual, true) - }) - - Convey("It should check that numbers not in either iterator are false.", func() { - So(orIt.Check(22), ShouldEqual, false) - So(orIt.Check(5), ShouldEqual, false) - So(orIt.Check(0), ShouldEqual, false) - }) - - }) - -} - -func TestShortCircuitingOrBasics(t *testing.T) { - var orIt *OrIterator - - Convey("Given a short-circuiting Or of two fixed iterators", t, func() { - orIt = NewShortCircuitOrIterator() - fixed1 := newFixedIterator() - fixed1.AddValue(1) - fixed1.AddValue(2) - fixed1.AddValue(3) - fixed2 := newFixedIterator() - fixed2.AddValue(3) - fixed2.AddValue(9) - fixed2.AddValue(20) - fixed2.AddValue(21) - - Convey("It should guess its size.", func() { - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - v, _ := orIt.Size() - So(v, ShouldEqual, 4) - }) - - Convey("It should extract the first iterators' numbers.", func() { - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - allNumbers := []int{1, 2, 3} - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - orIt.Reset() - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - // Optimization works - newOr, _ := orIt.Optimize() - So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) - }) - - Convey("It should check that numbers in either iterator exist.", func() { - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - So(orIt.Check(2), ShouldEqual, true) - So(orIt.Check(3), ShouldEqual, true) - So(orIt.Check(21), ShouldEqual, true) - So(orIt.Check(22), ShouldEqual, false) - So(orIt.Check(5), ShouldEqual, false) - So(orIt.Check(0), ShouldEqual, false) - - }) - - Convey("It should check that it pulls the second iterator's numbers if the first is empty.", func() { - orIt.AddSubIterator(newFixedIterator()) - orIt.AddSubIterator(fixed2) - allNumbers := []int{3, 9, 20, 21} - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - orIt.Reset() - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - // Optimization works - newOr, _ := orIt.Optimize() - So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) - }) - - }) - -} diff --git a/graph/query_shape.go b/graph/query_shape.go deleted file mode 100644 index d59a5c0..0000000 --- a/graph/query_shape.go +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -type Node struct { - Id int `json:"id"` - Tags []string `json:"tags,omitempty"` - Values []string `json:"values,omitempty"` - IsLinkNode bool `json:"is_link_node"` - IsFixed bool `json:"is_fixed"` -} - -type Link struct { - Source int `json:"source"` - Target int `json:"target"` - Pred int `json:"type"` - LinkNode int `json:"link_node"` -} - -type queryShape struct { - nodes []Node - links []Link - ts TripleStore - nodeId int - hasaIds []int - hasaDirs []Direction -} - -func OutputQueryShapeForIterator(it Iterator, ts TripleStore, outputMap *map[string]interface{}) { - qs := &queryShape{ - ts: ts, - nodeId: 1, - } - - node := qs.MakeNode(it.Clone()) - qs.AddNode(node) - (*outputMap)["nodes"] = qs.nodes - (*outputMap)["links"] = qs.links -} - -func (qs *queryShape) AddNode(n *Node) { - qs.nodes = append(qs.nodes, *n) -} - -func (qs *queryShape) AddLink(l *Link) { - qs.links = append(qs.links, *l) -} - -func (qs *queryShape) LastHasa() (int, Direction) { - return qs.hasaIds[len(qs.hasaIds)-1], qs.hasaDirs[len(qs.hasaDirs)-1] -} - -func (qs *queryShape) PushHasa(i int, d Direction) { - qs.hasaIds = append(qs.hasaIds, i) - qs.hasaDirs = append(qs.hasaDirs, d) -} - -func (qs *queryShape) RemoveHasa() { - qs.hasaIds = qs.hasaIds[:len(qs.hasaIds)-1] - qs.hasaDirs = qs.hasaDirs[:len(qs.hasaDirs)-1] -} - -func (qs *queryShape) StealNode(left *Node, right *Node) { - for _, v := range right.Values { - left.Values = append(left.Values, v) - } - for _, v := range right.Tags { - left.Tags = append(left.Tags, v) - } - left.IsLinkNode = left.IsLinkNode || right.IsLinkNode - left.IsFixed = left.IsFixed || right.IsFixed - for i, link := range qs.links { - rewrite := false - if link.LinkNode == right.Id { - link.LinkNode = left.Id - rewrite = true - } - if link.Source == right.Id { - link.Source = left.Id - rewrite = true - } - if link.Target == right.Id { - link.Target = left.Id - rewrite = true - } - if rewrite { - qs.links = append(append(qs.links[:i], qs.links[i+1:]...), link) - } - } -} - -func (qs *queryShape) MakeNode(it Iterator) *Node { - n := Node{Id: qs.nodeId} - for _, tag := range it.Tags() { - n.Tags = append(n.Tags, tag) - } - for k, _ := range it.FixedTags() { - n.Tags = append(n.Tags, k) - } - - switch it.Type() { - case "and": - for _, sub := range it.GetSubIterators() { - qs.nodeId++ - newNode := qs.MakeNode(sub) - if sub.Type() != "or" { - qs.StealNode(&n, newNode) - } else { - qs.AddNode(newNode) - qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) - } - } - case "fixed": - n.IsFixed = true - for { - val, more := it.Next() - if !more { - break - } - n.Values = append(n.Values, qs.ts.GetNameFor(val)) - } - case "hasa": - hasa := it.(*HasaIterator) - qs.PushHasa(n.Id, hasa.dir) - qs.nodeId++ - newNode := qs.MakeNode(hasa.primaryIt) - qs.AddNode(newNode) - qs.RemoveHasa() - case "or": - for _, sub := range it.GetSubIterators() { - qs.nodeId++ - newNode := qs.MakeNode(sub) - if sub.Type() == "or" { - qs.StealNode(&n, newNode) - } else { - qs.AddNode(newNode) - qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) - } - } - case "linksto": - n.IsLinkNode = true - lto := it.(*LinksToIterator) - qs.nodeId++ - newNode := qs.MakeNode(lto.primaryIt) - hasaID, hasaDir := qs.LastHasa() - if (hasaDir == Subject && lto.dir == Object) || - (hasaDir == Object && lto.dir == Subject) { - qs.AddNode(newNode) - if hasaDir == Subject { - qs.AddLink(&Link{hasaID, newNode.Id, 0, n.Id}) - } else { - qs.AddLink(&Link{newNode.Id, hasaID, 0, n.Id}) - } - } else if lto.primaryIt.Type() == "fixed" { - qs.StealNode(&n, newNode) - } else { - qs.AddNode(newNode) - } - case "optional": - // Unsupported, for the moment - fallthrough - case "all": - } - return &n -} diff --git a/graph/query_shape_test.go b/graph/query_shape_test.go deleted file mode 100644 index dc33fc3..0000000 --- a/graph/query_shape_test.go +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" - - . "github.com/smartystreets/goconvey/convey" -) - -func buildHasaWithTag(ts TripleStore, tag string, target string) *HasaIterator { - fixed_obj := ts.MakeFixed() - fixed_pred := ts.MakeFixed() - fixed_obj.AddValue(ts.GetIdFor(target)) - fixed_pred.AddValue(ts.GetIdFor("status")) - fixed_obj.AddTag(tag) - lto1 := NewLinksToIterator(ts, fixed_obj, Object) - lto2 := NewLinksToIterator(ts, fixed_pred, Predicate) - and := NewAndIterator() - and.AddSubIterator(lto1) - and.AddSubIterator(lto2) - hasa := NewHasaIterator(ts, and, Subject) - return hasa -} - -func TestQueryShape(t *testing.T) { - var queryShape map[string]interface{} - var ts *TestTripleStore - ts = new(TestTripleStore) - ts.On("GetIdFor", "cool").Return(1) - ts.On("GetNameFor", 1).Return("cool") - ts.On("GetIdFor", "status").Return(2) - ts.On("GetNameFor", 2).Return("status") - ts.On("GetIdFor", "fun").Return(3) - ts.On("GetNameFor", 3).Return("fun") - ts.On("GetIdFor", "name").Return(4) - ts.On("GetNameFor", 4).Return("name") - - Convey("Given a single linkage iterator's shape", t, func() { - queryShape = make(map[string]interface{}) - hasa := buildHasaWithTag(ts, "tag", "cool") - hasa.AddTag("top") - OutputQueryShapeForIterator(hasa, ts, &queryShape) - - Convey("It should have three nodes and one link", func() { - nodes := queryShape["nodes"].([]Node) - links := queryShape["links"].([]Link) - So(len(nodes), ShouldEqual, 3) - So(len(links), ShouldEqual, 1) - }) - - Convey("These nodes should be correctly tagged", func() { - nodes := queryShape["nodes"].([]Node) - So(nodes[0].Tags, ShouldResemble, []string{"tag"}) - So(nodes[1].IsLinkNode, ShouldEqual, true) - So(nodes[2].Tags, ShouldResemble, []string{"top"}) - - }) - - Convey("The link should be correctly typed", func() { - nodes := queryShape["nodes"].([]Node) - links := queryShape["links"].([]Link) - So(links[0].Source, ShouldEqual, nodes[2].Id) - So(links[0].Target, ShouldEqual, nodes[0].Id) - So(links[0].LinkNode, ShouldEqual, nodes[1].Id) - So(links[0].Pred, ShouldEqual, 0) - - }) - - }) - - Convey("Given a name-of-an-and-iterator's shape", t, func() { - queryShape = make(map[string]interface{}) - hasa1 := buildHasaWithTag(ts, "tag1", "cool") - hasa1.AddTag("hasa1") - hasa2 := buildHasaWithTag(ts, "tag2", "fun") - hasa1.AddTag("hasa2") - andInternal := NewAndIterator() - andInternal.AddSubIterator(hasa1) - andInternal.AddSubIterator(hasa2) - fixed_pred := ts.MakeFixed() - fixed_pred.AddValue(ts.GetIdFor("name")) - lto1 := NewLinksToIterator(ts, andInternal, Subject) - lto2 := NewLinksToIterator(ts, fixed_pred, Predicate) - and := NewAndIterator() - and.AddSubIterator(lto1) - and.AddSubIterator(lto2) - hasa := NewHasaIterator(ts, and, Object) - OutputQueryShapeForIterator(hasa, ts, &queryShape) - - Convey("It should have seven nodes and three links", func() { - nodes := queryShape["nodes"].([]Node) - links := queryShape["links"].([]Link) - So(len(nodes), ShouldEqual, 7) - So(len(links), ShouldEqual, 3) - }) - - Convey("Three of the nodes are link nodes, four aren't", func() { - nodes := queryShape["nodes"].([]Node) - count := 0 - for _, node := range nodes { - if node.IsLinkNode { - count++ - } - } - So(count, ShouldEqual, 3) - }) - - Convey("These nodes should be correctly tagged", nil) - - }) - -} diff --git a/graph/result_tree_evaluator.go b/graph/result_tree_evaluator.go index ddfa1cb..7dcfc85 100644 --- a/graph/result_tree_evaluator.go +++ b/graph/result_tree_evaluator.go @@ -25,11 +25,11 @@ func NewResultTree(result TSVal) *ResultTree { return &ResultTree{result: result} } -func (t *ResultTree) ToString() string { +func (t *ResultTree) String() string { base := fmt.Sprintf("(%d", t.result) if len(t.subtrees) != 0 { for _, sub := range t.subtrees { - base += fmt.Sprintf(" %s", sub.ToString()) + base += fmt.Sprintf(" %s", sub) } } base += ")" @@ -48,11 +48,11 @@ func StringResultTreeEvaluator(it Iterator) string { if !ok { break } - out += it.GetResultTree().ToString() + out += it.GetResultTree().String() out += "\n" for it.NextResult() == true { out += " " - out += it.GetResultTree().ToString() + out += it.GetResultTree().String() out += "\n" } } diff --git a/graph/result_tree_evaluator_test.go b/graph/result_tree_evaluator_test.go index 349bc08..ffd9c31 100644 --- a/graph/result_tree_evaluator_test.go +++ b/graph/result_tree_evaluator_test.go @@ -12,14 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -package graph +package graph_test import ( "testing" + + . "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func TestSingleIterator(t *testing.T) { - all := NewInt64AllIterator(1, 3) + all := iterator.NewInt64(1, 3) result := StringResultTreeEvaluator(all) expected := "(1)\n(2)\n(3)\n" if expected != result { @@ -28,9 +31,9 @@ func TestSingleIterator(t *testing.T) { } func TestAndIterator(t *testing.T) { - all1 := NewInt64AllIterator(1, 3) - all2 := NewInt64AllIterator(3, 5) - and := NewAndIterator() + all1 := iterator.NewInt64(1, 3) + all2 := iterator.NewInt64(3, 5) + and := iterator.NewAnd() and.AddSubIterator(all1) and.AddSubIterator(all2) diff --git a/graph/sexp/parser.go b/graph/sexp/parser.go index aeab8d6..9c3fca7 100644 --- a/graph/sexp/parser.go +++ b/graph/sexp/parser.go @@ -18,6 +18,7 @@ import ( "github.com/badgerodon/peg" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func BuildIteratorTreeForQuery(ts graph.TripleStore, query string) graph.Iterator { @@ -195,7 +196,7 @@ func buildIteratorTree(tree *peg.ExpressionTree, ts graph.TripleStore) graph.Ite if tree.Children[0].Children[0].Name == "ColonIdentifier" { n = nodeID[1:] } - fixed := ts.MakeFixed() + fixed := ts.FixedIterator() fixed.AddValue(ts.GetIdFor(n)) out = fixed } @@ -207,11 +208,11 @@ func buildIteratorTree(tree *peg.ExpressionTree, ts graph.TripleStore) graph.Ite i++ } it := buildIteratorTree(tree.Children[i], ts) - lto := graph.NewLinksToIterator(ts, it, graph.Predicate) + lto := iterator.NewLinksTo(ts, it, graph.Predicate) return lto case "RootConstraint": constraintCount := 0 - and := graph.NewAndIterator() + and := iterator.NewAnd() for _, c := range tree.Children { switch c.Name { case "NodeIdentifier": @@ -227,10 +228,10 @@ func buildIteratorTree(tree *peg.ExpressionTree, ts graph.TripleStore) graph.Ite } return and case "Constraint": - var hasa *graph.HasaIterator + var hasa *iterator.HasA topLevelDir := graph.Subject subItDir := graph.Object - subAnd := graph.NewAndIterator() + subAnd := iterator.NewAnd() isOptional := false for _, c := range tree.Children { switch c.Name { @@ -251,21 +252,21 @@ func buildIteratorTree(tree *peg.ExpressionTree, ts graph.TripleStore) graph.Ite fallthrough case "RootConstraint": it := buildIteratorTree(c, ts) - l := graph.NewLinksToIterator(ts, it, subItDir) + l := iterator.NewLinksTo(ts, it, subItDir) subAnd.AddSubIterator(l) continue default: continue } } - hasa = graph.NewHasaIterator(ts, subAnd, topLevelDir) + hasa = iterator.NewHasA(ts, subAnd, topLevelDir) if isOptional { - optional := graph.NewOptionalIterator(hasa) + optional := iterator.NewOptional(hasa) return optional } return hasa default: - return &graph.NullIterator{} + return &iterator.Null{} } panic("Not reached") } diff --git a/graph/sexp/parser_test.go b/graph/sexp/parser_test.go index 1d9caa1..63a5a2d 100644 --- a/graph/sexp/parser_test.go +++ b/graph/sexp/parser_test.go @@ -40,7 +40,7 @@ func TestParseSexpWithMemstore(t *testing.T) { }) Convey("It should get a single triple linkage", func() { - ts.AddTriple(graph.MakeTriple("i", "can", "win", "")) + ts.AddTriple(&graph.Triple{"i", "can", "win", ""}) query := "($a (:can \"win\"))" So(len(query), ShouldEqual, 17) it := BuildIteratorTreeForQuery(ts, query) @@ -51,7 +51,7 @@ func TestParseSexpWithMemstore(t *testing.T) { }) Convey("It can get an internal linkage", func() { - ts.AddTriple(graph.MakeTriple("i", "can", "win", "")) + ts.AddTriple(&graph.Triple{"i", "can", "win", ""}) query := "(\"i\" (:can $a))" it := BuildIteratorTreeForQuery(ts, query) So(it.Type(), ShouldEqual, "and") @@ -65,8 +65,8 @@ func TestParseSexpWithMemstore(t *testing.T) { func TestTreeConstraintParse(t *testing.T) { ts := memstore.NewTripleStore() - ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) - ts.AddTriple(graph.MakeTriple("food", "is", "good", "")) + ts.AddTriple(&graph.Triple{"i", "like", "food", ""}) + ts.AddTriple(&graph.Triple{"food", "is", "good", ""}) query := "(\"i\"\n" + "(:like\n" + "($a (:is :good))))" @@ -85,8 +85,8 @@ func TestTreeConstraintParse(t *testing.T) { func TestTreeConstraintTagParse(t *testing.T) { ts := memstore.NewTripleStore() - ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) - ts.AddTriple(graph.MakeTriple("food", "is", "good", "")) + ts.AddTriple(&graph.Triple{"i", "like", "food", ""}) + ts.AddTriple(&graph.Triple{"food", "is", "good", ""}) query := "(\"i\"\n" + "(:like\n" + "($a (:is :good))))" @@ -105,9 +105,9 @@ func TestTreeConstraintTagParse(t *testing.T) { func TestMultipleConstraintParse(t *testing.T) { ts := memstore.NewTripleStore() - ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) - ts.AddTriple(graph.MakeTriple("i", "like", "beer", "")) - ts.AddTriple(graph.MakeTriple("you", "like", "beer", "")) + ts.AddTriple(&graph.Triple{"i", "like", "food", ""}) + ts.AddTriple(&graph.Triple{"i", "like", "beer", ""}) + ts.AddTriple(&graph.Triple{"you", "like", "beer", ""}) query := "($a \n" + "(:like :beer)\n" + "(:like \"food\"))" diff --git a/graph/triple.go b/graph/triple.go index c0029c6..934302b 100644 --- a/graph/triple.go +++ b/graph/triple.go @@ -45,14 +45,6 @@ type Triple struct { Provenance string `json:"provenance,omitempty"` } -func NewTriple() *Triple { - return &Triple{} -} - -func MakeTriple(sub string, pred string, obj string, provenance string) *Triple { - return &Triple{sub, pred, obj, provenance} -} - // Direction specifies an edge's type. type Direction byte @@ -103,7 +95,7 @@ func (t *Triple) Equals(o *Triple) bool { } // Pretty-prints a triple. -func (t *Triple) ToString() string { +func (t *Triple) String() string { return fmt.Sprintf("%s -- %s -> %s\n", t.Subject, t.Predicate, t.Object) } diff --git a/graph/triplestore.go b/graph/triplestore.go index 759d507..8250988 100644 --- a/graph/triplestore.go +++ b/graph/triplestore.go @@ -69,8 +69,8 @@ type TripleStore interface { // Returns the number of triples currently stored. Size() int64 - // Creates a Fixed iterator which can compare TSVals - MakeFixed() *FixedIterator + // Creates a fixed iterator which can compare TSVals + FixedIterator() FixedIterator // Optimize an iterator in the context of the triple store. // Suppose we have a better index for the passed tree; this diff --git a/graph/value_comparison_iterator.go b/graph/value_comparison_iterator.go deleted file mode 100644 index 2224aab..0000000 --- a/graph/value_comparison_iterator.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// "Value Comparison" is a unary operator -- a filter across the values in the -// relevant subiterator. -// -// This is hugely useful for things like provenance, but value ranges in general -// come up from time to time. At *worst* we're as big as our underlying iterator. -// At best, we're the null iterator. -// -// This is ripe for backend-side optimization. If you can run a value iterator, -// from a sorted set -- some sort of value index, then go for it. -// -// In MQL terms, this is the [{"age>=": 21}] concept. - -import ( - "fmt" - "log" - "strconv" - "strings" -) - -type ComparisonOperator int - -const ( - kCompareLT ComparisonOperator = iota - kCompareLTE - kCompareGT - kCompareGTE - // Why no Equals? Because that's usually an AndIterator. -) - -type ValueComparisonIterator struct { - BaseIterator - subIt Iterator - op ComparisonOperator - comparisonValue interface{} - ts TripleStore -} - -func NewValueComparisonIterator( - subIt Iterator, - operator ComparisonOperator, - value interface{}, - ts TripleStore) *ValueComparisonIterator { - - var vc ValueComparisonIterator - BaseIteratorInit(&vc.BaseIterator) - vc.subIt = subIt - vc.op = operator - vc.comparisonValue = value - vc.ts = ts - return &vc -} - -// Here's the non-boilerplate part of the ValueComparison iterator. Given a value -// and our operator, determine whether or not we meet the requirement. -func (it *ValueComparisonIterator) doComparison(val TSVal) bool { - //TODO(barakmich): Implement string comparison. - nodeStr := it.ts.GetNameFor(val) - switch cVal := it.comparisonValue.(type) { - case int: - cInt := int64(cVal) - intVal, err := strconv.ParseInt(nodeStr, 10, 64) - if err != nil { - return false - } - return RunIntOp(intVal, it.op, cInt) - case int64: - intVal, err := strconv.ParseInt(nodeStr, 10, 64) - if err != nil { - return false - } - return RunIntOp(intVal, it.op, cVal) - default: - return true - } -} - -func (it *ValueComparisonIterator) Close() { - it.subIt.Close() -} - -func RunIntOp(a int64, op ComparisonOperator, b int64) bool { - switch op { - case kCompareLT: - return a < b - case kCompareLTE: - return a <= b - case kCompareGT: - return a > b - case kCompareGTE: - return a >= b - default: - log.Fatal("Unknown operator type") - return false - } -} - -func (it *ValueComparisonIterator) Reset() { - it.subIt.Reset() -} - -func (it *ValueComparisonIterator) Clone() Iterator { - out := NewValueComparisonIterator(it.subIt.Clone(), it.op, it.comparisonValue, it.ts) - out.CopyTagsFrom(it) - return out -} - -func (it *ValueComparisonIterator) Next() (TSVal, bool) { - var val TSVal - var ok bool - for { - val, ok = it.subIt.Next() - if !ok { - return nil, false - } - if it.doComparison(val) { - break - } - } - it.Last = val - return val, ok -} - -func (it *ValueComparisonIterator) NextResult() bool { - for { - hasNext := it.subIt.NextResult() - if !hasNext { - return false - } - if it.doComparison(it.subIt.LastResult()) { - return true - } - } - it.Last = it.subIt.LastResult() - return true -} - -func (it *ValueComparisonIterator) Check(val TSVal) bool { - if !it.doComparison(val) { - return false - } - return it.subIt.Check(val) -} - -// If we failed the check, then the subiterator should not contribute to the result -// set. Otherwise, go ahead and tag it. -func (it *ValueComparisonIterator) TagResults(out *map[string]TSVal) { - it.BaseIterator.TagResults(out) - it.subIt.TagResults(out) -} - -// Registers the value-comparison iterator. -func (it *ValueComparisonIterator) Type() string { return "value-comparison" } - -// Prints the value-comparison and its subiterator. -func (it *ValueComparisonIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s\n%s)", - strings.Repeat(" ", indent), - it.Type(), it.subIt.DebugString(indent+4)) -} - -// There's nothing to optimize, locally, for a value-comparison iterator. -// Replace the underlying iterator if need be. -// potentially replace it. -func (it *ValueComparisonIterator) Optimize() (Iterator, bool) { - newSub, changed := it.subIt.Optimize() - if changed { - it.subIt.Close() - it.subIt = newSub - } - return it, false -} - -// We're only as expensive as our subiterator. -// Again, optimized value comparison iterators should do better. -func (it *ValueComparisonIterator) GetStats() *IteratorStats { - return it.subIt.GetStats() -} diff --git a/graph/value_comparison_iterator_test.go b/graph/value_comparison_iterator_test.go deleted file mode 100644 index 23c795d..0000000 --- a/graph/value_comparison_iterator_test.go +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" -) - -func SetupMockTripleStore(nameMap map[string]int) *TestTripleStore { - ts := new(TestTripleStore) - for k, v := range nameMap { - ts.On("GetIdFor", k).Return(v) - ts.On("GetNameFor", v).Return(k) - } - return ts -} - -func SimpleValueTripleStore() *TestTripleStore { - ts := SetupMockTripleStore(map[string]int{ - "0": 0, - "1": 1, - "2": 2, - "3": 3, - "4": 4, - "5": 5, - }) - return ts -} - -func BuildFixedIterator() *FixedIterator { - fixed := newFixedIterator() - fixed.AddValue(0) - fixed.AddValue(1) - fixed.AddValue(2) - fixed.AddValue(3) - fixed.AddValue(4) - return fixed -} - -func checkIteratorContains(ts TripleStore, it Iterator, expected []string, t *testing.T) { - var actual []string - actual = nil - for { - val, ok := it.Next() - if !ok { - break - } - actual = append(actual, ts.GetNameFor(val)) - } - actualSet := actual[:] - for _, a := range expected { - found := false - for j, b := range actualSet { - if a == b { - actualSet = append(actualSet[:j], actualSet[j+1:]...) - found = true - break - } - } - if !found { - t.Error("Couldn't find", a, "in actual output.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) - return - } - } - if len(actualSet) != 0 { - t.Error("Actual output has more than expected.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) - } -} - -func TestWorkingIntValueComparison(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareLT, int64(3), ts) - checkIteratorContains(ts, vc, []string{"0", "1", "2"}, t) -} - -func TestFailingIntValueComparison(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareLT, int64(0), ts) - checkIteratorContains(ts, vc, []string{}, t) -} - -func TestWorkingGT(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareGT, int64(2), ts) - checkIteratorContains(ts, vc, []string{"3", "4"}, t) -} - -func TestWorkingGTE(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareGTE, int64(2), ts) - checkIteratorContains(ts, vc, []string{"2", "3", "4"}, t) -} - -func TestVCICheck(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareGTE, int64(2), ts) - if vc.Check(1) { - t.Error("1 is less than 2, should be GTE") - } - if !vc.Check(2) { - t.Error("2 is GTE 2") - } - if !vc.Check(3) { - t.Error("3 is GTE 2") - } - if vc.Check(5) { - t.Error("5 is not in the underlying iterator") - } -} diff --git a/http/write.go b/http/write.go index 20fb60d..959875c 100644 --- a/http/write.go +++ b/http/write.go @@ -16,7 +16,6 @@ package http import ( "encoding/json" - "errors" "fmt" "io/ioutil" "net/http" @@ -37,7 +36,7 @@ func ParseJsonToTripleList(jsonBody []byte) ([]*graph.Triple, error) { } for i, t := range tripleList { if !t.IsValid() { - return nil, errors.New(fmt.Sprintf("Invalid triple at index %d. %s", i, t.ToString())) + return nil, fmt.Errorf("Invalid triple at index %d. %s", i, t) } } return tripleList, nil diff --git a/nquads/nquads.go b/nquads/nquads.go index 296cd99..64b0d53 100644 --- a/nquads/nquads.go +++ b/nquads/nquads.go @@ -58,7 +58,7 @@ func Parse(str string) *graph.Triple { } str = skipWhitespace(remainder) if str != "" && str[0] == '.' { - return graph.MakeTriple(*sub, *pred, *obj, prov) + return &graph.Triple{*sub, *pred, *obj, prov} } return nil } diff --git a/query/gremlin/build_iterator.go b/query/gremlin/build_iterator.go index 6bdfac2..9c81666 100644 --- a/query/gremlin/build_iterator.go +++ b/query/gremlin/build_iterator.go @@ -21,6 +21,7 @@ import ( "github.com/robertkrimen/otto" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func getStrings(obj *otto.Object, field string) []string { @@ -40,9 +41,9 @@ func getStringArgs(obj *otto.Object) []string { return getStrings(obj, "string_a func buildIteratorTree(obj *otto.Object, ts graph.TripleStore) graph.Iterator { if !isVertexChain(obj) { - return graph.NewNullIterator() + return iterator.NewNull() } - return buildIteratorTreeHelper(obj, ts, graph.NewNullIterator()) + return buildIteratorTreeHelper(obj, ts, iterator.NewNull()) } func makeListOfStringsFromArrayValue(obj *otto.Object) []string { @@ -73,7 +74,7 @@ func buildIteratorFromValue(val otto.Value, ts graph.TripleStore) graph.Iterator thing, _ := val.Export() switch v := thing.(type) { case string: - it := ts.MakeFixed() + it := ts.FixedIterator() it.AddValue(ts.GetIdFor(v)) return it default: @@ -86,7 +87,7 @@ func buildIteratorFromValue(val otto.Value, ts graph.TripleStore) graph.Iterator case "Array": // Had better be an array of strings strings := makeListOfStringsFromArrayValue(val.Object()) - it := ts.MakeFixed() + it := ts.FixedIterator() for _, x := range strings { it.AddValue(ts.GetIdFor(x)) } @@ -98,13 +99,13 @@ func buildIteratorFromValue(val otto.Value, ts graph.TripleStore) graph.Iterator case "Date": fallthrough case "String": - it := ts.MakeFixed() + it := ts.FixedIterator() str, _ := val.ToString() it.AddValue(ts.GetIdFor(str)) return it default: glog.Errorln("Trying to handle unsupported Javascript value.") - return graph.NewNullIterator() + return iterator.NewNull() } } @@ -112,7 +113,7 @@ func buildInOutIterator(obj *otto.Object, ts graph.TripleStore, base graph.Itera argList, _ := obj.Get("_gremlin_values") if argList.Class() != "GoArray" { glog.Errorln("How is arglist not an array? Return nothing.", argList.Class()) - return graph.NewNullIterator() + return iterator.NewNull() } argArray := argList.Object() lengthVal, _ := argArray.Get("length") @@ -142,11 +143,11 @@ func buildInOutIterator(obj *otto.Object, ts graph.TripleStore, base graph.Itera if isReverse { in, out = out, in } - lto := graph.NewLinksToIterator(ts, base, in) - and := graph.NewAndIterator() - and.AddSubIterator(graph.NewLinksToIterator(ts, predicateNodeIterator, graph.Predicate)) + lto := iterator.NewLinksTo(ts, base, in) + and := iterator.NewAnd() + and.AddSubIterator(iterator.NewLinksTo(ts, predicateNodeIterator, graph.Predicate)) and.AddSubIterator(lto) - return graph.NewHasaIterator(ts, and, out) + return iterator.NewHasA(ts, and, out) } func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph.Iterator) graph.Iterator { @@ -169,7 +170,7 @@ func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph. if len(stringArgs) == 0 { it = ts.GetNodesAllIterator() } else { - fixed := ts.MakeFixed() + fixed := ts.FixedIterator() for _, name := range stringArgs { fixed.AddValue(ts.GetIdFor(name)) } @@ -183,58 +184,58 @@ func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph. case "save": all := ts.GetNodesAllIterator() if len(stringArgs) > 2 || len(stringArgs) == 0 { - return graph.NewNullIterator() + return iterator.NewNull() } if len(stringArgs) == 2 { all.AddTag(stringArgs[1]) } else { all.AddTag(stringArgs[0]) } - predFixed := ts.MakeFixed() + predFixed := ts.FixedIterator() predFixed.AddValue(ts.GetIdFor(stringArgs[0])) - subAnd := graph.NewAndIterator() - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, graph.Predicate)) - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, all, graph.Object)) - hasa := graph.NewHasaIterator(ts, subAnd, graph.Subject) - and := graph.NewAndIterator() + subAnd := iterator.NewAnd() + subAnd.AddSubIterator(iterator.NewLinksTo(ts, predFixed, graph.Predicate)) + subAnd.AddSubIterator(iterator.NewLinksTo(ts, all, graph.Object)) + hasa := iterator.NewHasA(ts, subAnd, graph.Subject) + and := iterator.NewAnd() and.AddSubIterator(hasa) and.AddSubIterator(subIt) it = and case "saver": all := ts.GetNodesAllIterator() if len(stringArgs) > 2 || len(stringArgs) == 0 { - return graph.NewNullIterator() + return iterator.NewNull() } if len(stringArgs) == 2 { all.AddTag(stringArgs[1]) } else { all.AddTag(stringArgs[0]) } - predFixed := ts.MakeFixed() + predFixed := ts.FixedIterator() predFixed.AddValue(ts.GetIdFor(stringArgs[0])) - subAnd := graph.NewAndIterator() - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, graph.Predicate)) - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, all, graph.Subject)) - hasa := graph.NewHasaIterator(ts, subAnd, graph.Object) - and := graph.NewAndIterator() + subAnd := iterator.NewAnd() + subAnd.AddSubIterator(iterator.NewLinksTo(ts, predFixed, graph.Predicate)) + subAnd.AddSubIterator(iterator.NewLinksTo(ts, all, graph.Subject)) + hasa := iterator.NewHasA(ts, subAnd, graph.Object) + and := iterator.NewAnd() and.AddSubIterator(hasa) and.AddSubIterator(subIt) it = and case "has": - fixed := ts.MakeFixed() + fixed := ts.FixedIterator() if len(stringArgs) < 2 { - return graph.NewNullIterator() + return iterator.NewNull() } for _, name := range stringArgs[1:] { fixed.AddValue(ts.GetIdFor(name)) } - predFixed := ts.MakeFixed() + predFixed := ts.FixedIterator() predFixed.AddValue(ts.GetIdFor(stringArgs[0])) - subAnd := graph.NewAndIterator() - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, graph.Predicate)) - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, fixed, graph.Object)) - hasa := graph.NewHasaIterator(ts, subAnd, graph.Subject) - and := graph.NewAndIterator() + subAnd := iterator.NewAnd() + subAnd.AddSubIterator(iterator.NewLinksTo(ts, predFixed, graph.Predicate)) + subAnd.AddSubIterator(iterator.NewLinksTo(ts, fixed, graph.Object)) + hasa := iterator.NewHasA(ts, subAnd, graph.Subject) + and := iterator.NewAnd() and.AddSubIterator(hasa) and.AddSubIterator(subIt) it = and @@ -244,27 +245,27 @@ func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph. arg, _ := obj.Get("_gremlin_values") firstArg, _ := arg.Object().Get("0") if !isVertexChain(firstArg.Object()) { - return graph.NewNullIterator() + return iterator.NewNull() } argIt := buildIteratorTree(firstArg.Object(), ts) - and := graph.NewAndIterator() + and := iterator.NewAnd() and.AddSubIterator(subIt) and.AddSubIterator(argIt) it = and case "back": arg, _ := obj.Get("_gremlin_back_chain") argIt := buildIteratorTree(arg.Object(), ts) - and := graph.NewAndIterator() + and := iterator.NewAnd() and.AddSubIterator(subIt) and.AddSubIterator(argIt) it = and case "is": - fixed := ts.MakeFixed() + fixed := ts.FixedIterator() for _, name := range stringArgs { fixed.AddValue(ts.GetIdFor(name)) } - and := graph.NewAndIterator() + and := iterator.NewAnd() and.AddSubIterator(fixed) and.AddSubIterator(subIt) it = and @@ -272,11 +273,11 @@ func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph. arg, _ := obj.Get("_gremlin_values") firstArg, _ := arg.Object().Get("0") if !isVertexChain(firstArg.Object()) { - return graph.NewNullIterator() + return iterator.NewNull() } argIt := buildIteratorTree(firstArg.Object(), ts) - or := graph.NewOrIterator() + or := iterator.NewOr() or.AddSubIterator(subIt) or.AddSubIterator(argIt) it = or @@ -287,7 +288,7 @@ func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph. it1 := buildInOutIterator(obj, ts, subIt, false) it2 := buildInOutIterator(obj, ts, clone, true) - or := graph.NewOrIterator() + or := iterator.NewOr() or.AddSubIterator(it1) or.AddSubIterator(it2) it = or @@ -298,14 +299,14 @@ func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph. arg, _ := obj.Get("_gremlin_values") firstArg, _ := arg.Object().Get("0") if isVertexChain(firstArg.Object()) { - return graph.NewNullIterator() + return iterator.NewNull() } it = buildIteratorTreeHelper(firstArg.Object(), ts, subIt) case "followr": // Follow a morphism arg, _ := obj.Get("_gremlin_followr") if isVertexChain(arg.Object()) { - return graph.NewNullIterator() + return iterator.NewNull() } it = buildIteratorTreeHelper(arg.Object(), ts, subIt) case "in": diff --git a/query/gremlin/finals.go b/query/gremlin/finals.go index 57d3ba1..4babcd0 100644 --- a/query/gremlin/finals.go +++ b/query/gremlin/finals.go @@ -19,6 +19,7 @@ import ( "github.com/robertkrimen/otto" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) const TopResultTag = "id" @@ -238,7 +239,7 @@ func runIteratorWithCallback(it graph.Iterator, ses *Session, callback otto.Valu func runIteratorOnSession(it graph.Iterator, ses *Session) { if ses.lookingForQueryShape { - graph.OutputQueryShapeForIterator(it, ses.ts, &(ses.queryShape)) + iterator.OutputQueryShapeForIterator(it, ses.ts, &(ses.queryShape)) return } it, _ = it.Optimize() diff --git a/query/mql/build_iterator.go b/query/mql/build_iterator.go index f0a0b1b..6f4e9d3 100644 --- a/query/mql/build_iterator.go +++ b/query/mql/build_iterator.go @@ -22,10 +22,11 @@ import ( "strings" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) func (q *Query) buildFixed(s string) graph.Iterator { - f := q.ses.ts.MakeFixed() + f := q.ses.ts.FixedIterator() f.AddValue(q.ses.ts.GetIdFor(s)) return f } @@ -101,7 +102,7 @@ func (q *Query) buildIteratorTreeInternal(query interface{}, path Path) (it grap } func (q *Query) buildIteratorTreeMapInternal(query map[string]interface{}, path Path) (graph.Iterator, error) { - it := graph.NewAndIterator() + it := iterator.NewAnd() it.AddSubIterator(q.ses.ts.GetNodesAllIterator()) var err error err = nil @@ -135,24 +136,24 @@ func (q *Query) buildIteratorTreeMapInternal(query map[string]interface{}, path if err != nil { return nil, err } - subAnd := graph.NewAndIterator() - predFixed := q.ses.ts.MakeFixed() + subAnd := iterator.NewAnd() + predFixed := q.ses.ts.FixedIterator() predFixed.AddValue(q.ses.ts.GetIdFor(pred)) - subAnd.AddSubIterator(graph.NewLinksToIterator(q.ses.ts, predFixed, graph.Predicate)) + subAnd.AddSubIterator(iterator.NewLinksTo(q.ses.ts, predFixed, graph.Predicate)) if reverse { - lto := graph.NewLinksToIterator(q.ses.ts, builtIt, graph.Subject) + lto := iterator.NewLinksTo(q.ses.ts, builtIt, graph.Subject) subAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(q.ses.ts, subAnd, graph.Object) + hasa := iterator.NewHasA(q.ses.ts, subAnd, graph.Object) subit = hasa } else { - lto := graph.NewLinksToIterator(q.ses.ts, builtIt, graph.Object) + lto := iterator.NewLinksTo(q.ses.ts, builtIt, graph.Object) subAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(q.ses.ts, subAnd, graph.Subject) + hasa := iterator.NewHasA(q.ses.ts, subAnd, graph.Subject) subit = hasa } } if optional { - it.AddSubIterator(graph.NewOptionalIterator(subit)) + it.AddSubIterator(iterator.NewOptional(subit)) } else { it.AddSubIterator(subit) } diff --git a/query/mql/session.go b/query/mql/session.go index f148afb..059358e 100644 --- a/query/mql/session.go +++ b/query/mql/session.go @@ -22,6 +22,7 @@ import ( "github.com/barakmich/glog" "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" ) type Session struct { @@ -50,9 +51,9 @@ func (m *Session) GetQuery(input string, output_struct chan map[string]interface m.currentQuery = NewQuery(m) m.currentQuery.BuildIteratorTree(mqlQuery) output := make(map[string]interface{}) - graph.OutputQueryShapeForIterator(m.currentQuery.it, m.ts, &output) - nodes := output["nodes"].([]graph.Node) - new_nodes := make([]graph.Node, 0) + iterator.OutputQueryShapeForIterator(m.currentQuery.it, m.ts, &output) + nodes := output["nodes"].([]iterator.Node) + new_nodes := make([]iterator.Node, 0) for _, n := range nodes { n.Tags = nil new_nodes = append(new_nodes, n)