Move iterators into separate package

Also reduce API exposure and use standard library more - and fix bugs I previously introduces in mongo.
2014-06-30 22:22:50 +09:30 · 2014-06-30 22:22:50 +09:30 · 1768e593a8
commit 1768e593a8
parent 88be6bee37
62 changed files with 3240 additions and 3130 deletions
--- a/graph/iterator/hasa_iterator.go
+++ b/graph/iterator/hasa_iterator.go
@ -0,0 +1,223 @@
+// Copyright 2014 The Cayley Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package iterator
+
+// Defines one of the base iterators, the HasA iterator. The HasA takes a
+// subiterator of links, and acts as an iterator of nodes in the given
+// direction. The name comes from the idea that a "link HasA subject" or a "link
+// HasA predicate".
+//
+// HasA is weird in that it may return the same value twice if on the Next()
+// path. That's okay -- in reality, it can be viewed as returning the value for
+// a new triple, but to make logic much simpler, here we have the HasA.
+//
+// Likewise, it's important to think about Check()ing a HasA. When given a
+// value to check, it means "Check all predicates that have this value for your
+// direction against the subiterator." This would imply that there's more than
+// one possibility for the same Check()ed value. While we could return the
+// number of options, it's simpler to return one, and then call NextResult()
+// enough times to enumerate the options. (In fact, one could argue that the
+// raison d'etre for NextResult() is this iterator).
+//
+// Alternatively, can be seen as the dual of the LinksTo iterator.
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/barakmich/glog"
+
+	"github.com/google/cayley/graph"
+)
+
+// A HasA consists of a reference back to the graph.TripleStore that it references,
+// a primary subiterator, a direction in which the triples for that subiterator point,
+// and a temporary holder for the iterator generated on Check().
+type HasA struct {
+	Base
+	ts        graph.TripleStore
+	primaryIt graph.Iterator
+	dir       graph.Direction
+	resultIt  graph.Iterator
+}
+
+// Construct a new HasA iterator, given the triple subiterator, and the triple
+// direction for which it stands.
+func NewHasA(ts graph.TripleStore, subIt graph.Iterator, d graph.Direction) *HasA {
+	var hasa HasA
+	BaseInit(&hasa.Base)
+	hasa.ts = ts
+	hasa.primaryIt = subIt
+	hasa.dir = d
+	return &hasa
+}
+
+// Return our sole subiterator.
+func (it *HasA) GetSubIterators() []graph.Iterator {
+	return []graph.Iterator{it.primaryIt}
+}
+
+func (it *HasA) Reset() {
+	it.primaryIt.Reset()
+	if it.resultIt != nil {
+		it.resultIt.Close()
+	}
+}
+
+func (it *HasA) Clone() graph.Iterator {
+	out := NewHasA(it.ts, it.primaryIt.Clone(), it.dir)
+	out.CopyTagsFrom(it)
+	return out
+}
+
+// Direction accessor.
+func (it *HasA) Direction() graph.Direction { return it.dir }
+
+// Pass the Optimize() call along to the subiterator. If it becomes Null,
+// then the HasA becomes Null (there are no triples that have any directions).
+func (it *HasA) Optimize() (graph.Iterator, bool) {
+	newPrimary, changed := it.primaryIt.Optimize()
+	if changed {
+		it.primaryIt = newPrimary
+		if it.primaryIt.Type() == "null" {
+			return it.primaryIt, true
+		}
+	}
+	return it, false
+}
+
+// Pass the TagResults down the chain.
+func (it *HasA) TagResults(out *map[string]graph.TSVal) {
+	it.Base.TagResults(out)
+	it.primaryIt.TagResults(out)
+}
+
+// DEPRECATED Return results in a ResultTree.
+func (it *HasA) GetResultTree() *graph.ResultTree {
+	tree := graph.NewResultTree(it.LastResult())
+	tree.AddSubtree(it.primaryIt.GetResultTree())
+	return tree
+}
+
+// Print some information about this iterator.
+func (it *HasA) DebugString(indent int) string {
+	var tags string
+	for _, k := range it.Tags() {
+		tags += fmt.Sprintf("%s;", k)
+	}
+	return fmt.Sprintf("%s(%s %d tags:%s direction:%s\n%s)", strings.Repeat(" ", indent), it.Type(), it.GetUid(), tags, it.dir, it.primaryIt.DebugString(indent+4))
+}
+
+// Check a value against our internal iterator. In order to do this, we must first open a new
+// iterator of "triples that have `val` in our direction", given to us by the triple store,
+// and then Next() values out of that iterator and Check() them against our subiterator.
+func (it *HasA) Check(val graph.TSVal) bool {
+	CheckLogIn(it, val)
+	if glog.V(4) {
+		glog.V(4).Infoln("Id is", it.ts.GetNameFor(val))
+	}
+	// TODO(barakmich): Optimize this
+	if it.resultIt != nil {
+		it.resultIt.Close()
+	}
+	it.resultIt = it.ts.GetTripleIterator(it.dir, val)
+	return CheckLogOut(it, val, it.GetCheckResult())
+}
+
+// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the
+// result iterator (a triple iterator based on the last checked value) and returns true if
+// another match is made.
+func (it *HasA) GetCheckResult() bool {
+	for {
+		linkVal, ok := it.resultIt.Next()
+		if !ok {
+			break
+		}
+		if glog.V(4) {
+			glog.V(4).Infoln("Triple is", it.ts.GetTriple(linkVal))
+		}
+		if it.primaryIt.Check(linkVal) {
+			it.Last = it.ts.GetTripleDirection(linkVal, it.dir)
+			return true
+		}
+	}
+	return false
+}
+
+// Get the next result that matches this branch.
+func (it *HasA) NextResult() bool {
+	// Order here is important. If the subiterator has a NextResult, then we
+	// need do nothing -- there is a next result, and we shouldn't move forward.
+	// However, we then need to get the next result from our last Check().
+	//
+	// The upshot is, the end of NextResult() bubbles up from the bottom of the
+	// iterator tree up, and we need to respect that.
+	if it.primaryIt.NextResult() {
+		return true
+	}
+	return it.GetCheckResult()
+}
+
+// Get the next result from this iterator. This is simpler than Check. We have a
+// subiterator we can get a value from, and we can take that resultant triple,
+// pull our direction out of it, and return that.
+func (it *HasA) Next() (graph.TSVal, bool) {
+	NextLogIn(it)
+	if it.resultIt != nil {
+		it.resultIt.Close()
+	}
+	it.resultIt = &Null{}
+
+	tID, ok := it.primaryIt.Next()
+	if !ok {
+		return NextLogOut(it, 0, false)
+	}
+	name := it.ts.GetTriple(tID).Get(it.dir)
+	val := it.ts.GetIdFor(name)
+	it.Last = val
+	return NextLogOut(it, val, true)
+}
+
+// GetStats() returns the statistics on the HasA iterator. This is curious. Next
+// cost is easy, it's an extra call or so on top of the subiterator Next cost.
+// CheckCost involves going to the graph.TripleStore, iterating out values, and hoping
+// one sticks -- potentially expensive, depending on fanout. Size, however, is
+// potentially smaller. we know at worst it's the size of the subiterator, but
+// if there are many repeated values, it could be much smaller in totality.
+func (it *HasA) GetStats() *graph.IteratorStats {
+	subitStats := it.primaryIt.GetStats()
+	// TODO(barakmich): These should really come from the triplestore itself
+	// and be optimized.
+	faninFactor := int64(1)
+	fanoutFactor := int64(30)
+	nextConstant := int64(2)
+	tripleConstant := int64(1)
+	return &graph.IteratorStats{
+		NextCost:  tripleConstant + subitStats.NextCost,
+		CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost,
+		Size:      faninFactor * subitStats.Size,
+	}
+}
+
+// Close the subiterator, the result iterator (if any) and the HasA.
+func (it *HasA) Close() {
+	if it.resultIt != nil {
+		it.resultIt.Close()
+	}
+	it.primaryIt.Close()
+}
+
+// Register this iterator as a HasA.
+func (it *HasA) Type() string { return "hasa" }