cayley/graph/hasa_iterator.go
kortschak 60d5c60817 Canonicalise iterator receiver names
This apparently meaningless churn improves godoc readability.
2014-06-28 21:36:50 +09:30

224 lines
7.4 KiB
Go

// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package graph
// Defines one of the base iterators, the HasA iterator. The HasA takes a
// subiterator of links, and acts as an iterator of nodes in the given
// direction. The name comes from the idea that a "link HasA subject" or a "link
// HasA predicate".
//
// HasA is weird in that it may return the same value twice if on the Next()
// path. That's okay -- in reality, it can be viewed as returning the value for
// a new triple, but to make logic much simpler, here we have the HasA.
//
// Likewise, it's important to think about Check()ing a HasA. When given a
// value to check, it means "Check all predicates that have this value for your
// direction against the subiterator." This would imply that there's more than
// one possibility for the same Check()ed value. While we could return the
// number of options, it's simpler to return one, and then call NextResult()
// enough times to enumerate the options. (In fact, one could argue that the
// raison d'etre for NextResult() is this iterator).
//
// Alternatively, can be seen as the dual of the LinksTo iterator.
import (
"container/list"
"fmt"
"strings"
"github.com/barakmich/glog"
)
// A HasaIterator consists of a reference back to the TripleStore that it references,
// a primary subiterator, a direction in which the triples for that subiterator point,
// and a temporary holder for the iterator generated on Check().
type HasaIterator struct {
BaseIterator
ts TripleStore
primaryIt Iterator
direction string
resultIt Iterator
}
// Construct a new HasA iterator, given the triple subiterator, and the triple
// direction for which it stands.
func NewHasaIterator(ts TripleStore, subIt Iterator, dir string) *HasaIterator {
var hasa HasaIterator
BaseIteratorInit(&hasa.BaseIterator)
hasa.ts = ts
hasa.primaryIt = subIt
hasa.direction = dir
return &hasa
}
// Return our sole subiterator, in a list.List.
func (it *HasaIterator) GetSubIterators() *list.List {
l := list.New()
l.PushBack(it.primaryIt)
return l
}
func (it *HasaIterator) Reset() {
it.primaryIt.Reset()
if it.resultIt != nil {
it.resultIt.Close()
}
}
func (it *HasaIterator) Clone() Iterator {
out := NewHasaIterator(it.ts, it.primaryIt.Clone(), it.direction)
out.CopyTagsFrom(it)
return out
}
// Direction accessor.
func (it *HasaIterator) Direction() string { return it.direction }
// Pass the Optimize() call along to the subiterator. If it becomes Null,
// then the HasA becomes Null (there are no triples that have any directions).
func (it *HasaIterator) Optimize() (Iterator, bool) {
newPrimary, changed := it.primaryIt.Optimize()
if changed {
it.primaryIt = newPrimary
if it.primaryIt.Type() == "null" {
return it.primaryIt, true
}
}
return it, false
}
// Pass the TagResults down the chain.
func (it *HasaIterator) TagResults(out *map[string]TSVal) {
it.BaseIterator.TagResults(out)
it.primaryIt.TagResults(out)
}
// DEPRECATED Return results in a ResultTree.
func (it *HasaIterator) GetResultTree() *ResultTree {
tree := NewResultTree(it.LastResult())
tree.AddSubtree(it.primaryIt.GetResultTree())
return tree
}
// Print some information about this iterator.
func (it *HasaIterator) DebugString(indent int) string {
var tags string
for _, k := range it.Tags() {
tags += fmt.Sprintf("%s;", k)
}
return fmt.Sprintf("%s(%s %d tags:%s direction:%s\n%s)", strings.Repeat(" ", indent), it.Type(), it.GetUid(), tags, it.direction, it.primaryIt.DebugString(indent+4))
}
// Check a value against our internal iterator. In order to do this, we must first open a new
// iterator of "triples that have `val` in our direction", given to us by the triple store,
// and then Next() values out of that iterator and Check() them against our subiterator.
func (it *HasaIterator) Check(val TSVal) bool {
CheckLogIn(it, val)
if glog.V(4) {
glog.V(4).Infoln("Id is", it.ts.GetNameFor(val))
}
// TODO(barakmich): Optimize this
if it.resultIt != nil {
it.resultIt.Close()
}
it.resultIt = it.ts.GetTripleIterator(it.direction, val)
return CheckLogOut(it, val, it.GetCheckResult())
}
// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the
// result iterator (a triple iterator based on the last checked value) and returns true if
// another match is made.
func (it *HasaIterator) GetCheckResult() bool {
for {
linkVal, ok := it.resultIt.Next()
if !ok {
break
}
if glog.V(4) {
glog.V(4).Infoln("Triple is", it.ts.GetTriple(linkVal).ToString())
}
if it.primaryIt.Check(linkVal) {
it.Last = it.ts.GetTripleDirection(linkVal, it.direction)
return true
}
}
return false
}
// Get the next result that matches this branch.
func (it *HasaIterator) NextResult() bool {
// Order here is important. If the subiterator has a NextResult, then we
// need do nothing -- there is a next result, and we shouldn't move forward.
// However, we then need to get the next result from our last Check().
//
// The upshot is, the end of NextResult() bubbles up from the bottom of the
// iterator tree up, and we need to respect that.
if it.primaryIt.NextResult() {
return true
}
return it.GetCheckResult()
}
// Get the next result from this iterator. This is simpler than Check. We have a
// subiterator we can get a value from, and we can take that resultant triple,
// pull our direction out of it, and return that.
func (it *HasaIterator) Next() (TSVal, bool) {
NextLogIn(it)
if it.resultIt != nil {
it.resultIt.Close()
}
it.resultIt = &NullIterator{}
tID, ok := it.primaryIt.Next()
if !ok {
return NextLogOut(it, 0, false)
}
name := it.ts.GetTriple(tID).Get(it.direction)
val := it.ts.GetIdFor(name)
it.Last = val
return NextLogOut(it, val, true)
}
// GetStats() returns the statistics on the HasA iterator. This is curious. Next
// cost is easy, it's an extra call or so on top of the subiterator Next cost.
// CheckCost involves going to the TripleStore, iterating out values, and hoping
// one sticks -- potentially expensive, depending on fanout. Size, however, is
// potentially smaller. we know at worst it's the size of the subiterator, but
// if there are many repeated values, it could be much smaller in totality.
func (it *HasaIterator) GetStats() *IteratorStats {
subitStats := it.primaryIt.GetStats()
// TODO(barakmich): These should really come from the triplestore itself
// and be optimized.
faninFactor := int64(1)
fanoutFactor := int64(30)
nextConstant := int64(2)
tripleConstant := int64(1)
return &IteratorStats{
NextCost: tripleConstant + subitStats.NextCost,
CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost,
Size: faninFactor * subitStats.Size,
}
}
// Close the subiterator, the result iterator (if any) and the HasA.
func (it *HasaIterator) Close() {
if it.resultIt != nil {
it.resultIt.Close()
}
it.primaryIt.Close()
}
// Register this iterator as a HasA.
func (it *HasaIterator) Type() string { return "hasa" }