282 lines
8.3 KiB
Go
282 lines
8.3 KiB
Go
// Copyright 2014 The Cayley Authors. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package iterator
|
|
|
|
// Defines one of the base iterators, the HasA iterator. The HasA takes a
|
|
// subiterator of links, and acts as an iterator of nodes in the given
|
|
// direction. The name comes from the idea that a "link HasA subject" or a "link
|
|
// HasA predicate".
|
|
//
|
|
// HasA is weird in that it may return the same value twice if on the Next()
|
|
// path. That's okay -- in reality, it can be viewed as returning the value for
|
|
// a new quad, but to make logic much simpler, here we have the HasA.
|
|
//
|
|
// Likewise, it's important to think about Contains()ing a HasA. When given a
|
|
// value to check, it means "Check all predicates that have this value for your
|
|
// direction against the subiterator." This would imply that there's more than
|
|
// one possibility for the same Contains()ed value. While we could return the
|
|
// number of options, it's simpler to return one, and then call NextPath()
|
|
// enough times to enumerate the options. (In fact, one could argue that the
|
|
// raison d'etre for NextPath() is this iterator).
|
|
//
|
|
// Alternatively, can be seen as the dual of the LinksTo iterator.
|
|
|
|
import (
|
|
"github.com/barakmich/glog"
|
|
|
|
"github.com/google/cayley/graph"
|
|
"github.com/google/cayley/quad"
|
|
)
|
|
|
|
// A HasA consists of a reference back to the graph.QuadStore that it references,
|
|
// a primary subiterator, a direction in which the quads for that subiterator point,
|
|
// and a temporary holder for the iterator generated on Contains().
|
|
type HasA struct {
|
|
uid uint64
|
|
tags graph.Tagger
|
|
qs graph.QuadStore
|
|
primaryIt graph.Iterator
|
|
dir quad.Direction
|
|
resultIt graph.Iterator
|
|
result graph.Value
|
|
runstats graph.IteratorStats
|
|
err error
|
|
}
|
|
|
|
// Construct a new HasA iterator, given the quad subiterator, and the quad
|
|
// direction for which it stands.
|
|
func NewHasA(qs graph.QuadStore, subIt graph.Iterator, d quad.Direction) *HasA {
|
|
return &HasA{
|
|
uid: NextUID(),
|
|
qs: qs,
|
|
primaryIt: subIt,
|
|
dir: d,
|
|
}
|
|
}
|
|
|
|
func (it *HasA) UID() uint64 {
|
|
return it.uid
|
|
}
|
|
|
|
// Return our sole subiterator.
|
|
func (it *HasA) SubIterators() []graph.Iterator {
|
|
return []graph.Iterator{it.primaryIt}
|
|
}
|
|
|
|
func (it *HasA) Reset() {
|
|
it.primaryIt.Reset()
|
|
if it.resultIt != nil {
|
|
it.resultIt.Close()
|
|
}
|
|
}
|
|
|
|
func (it *HasA) Tagger() *graph.Tagger {
|
|
return &it.tags
|
|
}
|
|
|
|
func (it *HasA) Clone() graph.Iterator {
|
|
out := NewHasA(it.qs, it.primaryIt.Clone(), it.dir)
|
|
out.tags.CopyFrom(it)
|
|
return out
|
|
}
|
|
|
|
// Direction accessor.
|
|
func (it *HasA) Direction() quad.Direction { return it.dir }
|
|
|
|
// Pass the Optimize() call along to the subiterator. If it becomes Null,
|
|
// then the HasA becomes Null (there are no quads that have any directions).
|
|
func (it *HasA) Optimize() (graph.Iterator, bool) {
|
|
newPrimary, changed := it.primaryIt.Optimize()
|
|
if changed {
|
|
it.primaryIt = newPrimary
|
|
if it.primaryIt.Type() == graph.Null {
|
|
return it.primaryIt, true
|
|
}
|
|
}
|
|
return it, false
|
|
}
|
|
|
|
// Pass the TagResults down the chain.
|
|
func (it *HasA) TagResults(dst map[string]graph.Value) {
|
|
for _, tag := range it.tags.Tags() {
|
|
dst[tag] = it.Result()
|
|
}
|
|
|
|
for tag, value := range it.tags.Fixed() {
|
|
dst[tag] = value
|
|
}
|
|
|
|
it.primaryIt.TagResults(dst)
|
|
}
|
|
|
|
// DEPRECATED Return results in a ResultTree.
|
|
func (it *HasA) ResultTree() *graph.ResultTree {
|
|
tree := graph.NewResultTree(it.Result())
|
|
tree.AddSubtree(it.primaryIt.ResultTree())
|
|
return tree
|
|
}
|
|
|
|
func (it *HasA) Describe() graph.Description {
|
|
primary := it.primaryIt.Describe()
|
|
return graph.Description{
|
|
UID: it.UID(),
|
|
Type: it.Type(),
|
|
Tags: it.tags.Tags(),
|
|
Direction: it.dir,
|
|
Iterator: &primary,
|
|
}
|
|
}
|
|
|
|
// Check a value against our internal iterator. In order to do this, we must first open a new
|
|
// iterator of "quads that have `val` in our direction", given to us by the quad store,
|
|
// and then Next() values out of that iterator and Contains() them against our subiterator.
|
|
func (it *HasA) Contains(val graph.Value) bool {
|
|
graph.ContainsLogIn(it, val)
|
|
it.runstats.Contains += 1
|
|
if glog.V(4) {
|
|
glog.V(4).Infoln("Id is", it.qs.NameOf(val))
|
|
}
|
|
// TODO(barakmich): Optimize this
|
|
if it.resultIt != nil {
|
|
it.resultIt.Close()
|
|
}
|
|
it.resultIt = it.qs.QuadIterator(it.dir, val)
|
|
ok := it.NextContains()
|
|
if it.err != nil {
|
|
return false
|
|
}
|
|
return graph.ContainsLogOut(it, val, ok)
|
|
}
|
|
|
|
// NextContains() is shared code between Contains() and GetNextResult() -- calls next on the
|
|
// result iterator (a quad iterator based on the last checked value) and returns true if
|
|
// another match is made.
|
|
func (it *HasA) NextContains() bool {
|
|
for graph.Next(it.resultIt) {
|
|
it.runstats.ContainsNext += 1
|
|
link := it.resultIt.Result()
|
|
if glog.V(4) {
|
|
glog.V(4).Infoln("Quad is", it.qs.Quad(link))
|
|
}
|
|
if it.primaryIt.Contains(link) {
|
|
it.result = it.qs.QuadDirection(link, it.dir)
|
|
return true
|
|
}
|
|
}
|
|
it.err = it.resultIt.Err()
|
|
return false
|
|
}
|
|
|
|
// Get the next result that matches this branch.
|
|
func (it *HasA) NextPath() bool {
|
|
// Order here is important. If the subiterator has a NextPath, then we
|
|
// need do nothing -- there is a next result, and we shouldn't move forward.
|
|
// However, we then need to get the next result from our last Contains().
|
|
//
|
|
// The upshot is, the end of NextPath() bubbles up from the bottom of the
|
|
// iterator tree up, and we need to respect that.
|
|
glog.V(4).Infoln("HASA", it.UID(), "NextPath")
|
|
if it.primaryIt.NextPath() {
|
|
return true
|
|
}
|
|
it.err = it.primaryIt.Err()
|
|
if it.err != nil {
|
|
return false
|
|
}
|
|
|
|
result := it.NextContains() // Sets it.err if there's an error
|
|
if it.err != nil {
|
|
return false
|
|
}
|
|
glog.V(4).Infoln("HASA", it.UID(), "NextPath Returns", result, "")
|
|
return result
|
|
}
|
|
|
|
// Next advances the iterator. This is simpler than Contains. We have a
|
|
// subiterator we can get a value from, and we can take that resultant quad,
|
|
// pull our direction out of it, and return that.
|
|
func (it *HasA) Next() bool {
|
|
graph.NextLogIn(it)
|
|
it.runstats.Next += 1
|
|
if it.resultIt != nil {
|
|
it.resultIt.Close()
|
|
}
|
|
it.resultIt = &Null{}
|
|
|
|
if !graph.Next(it.primaryIt) {
|
|
it.err = it.primaryIt.Err()
|
|
return graph.NextLogOut(it, 0, false)
|
|
}
|
|
tID := it.primaryIt.Result()
|
|
val := it.qs.QuadDirection(tID, it.dir)
|
|
it.result = val
|
|
return graph.NextLogOut(it, val, true)
|
|
}
|
|
|
|
func (it *HasA) Err() error {
|
|
return it.err
|
|
}
|
|
|
|
func (it *HasA) Result() graph.Value {
|
|
return it.result
|
|
}
|
|
|
|
// GetStats() returns the statistics on the HasA iterator. This is curious. Next
|
|
// cost is easy, it's an extra call or so on top of the subiterator Next cost.
|
|
// ContainsCost involves going to the graph.QuadStore, iterating out values, and hoping
|
|
// one sticks -- potentially expensive, depending on fanout. Size, however, is
|
|
// potentially smaller. we know at worst it's the size of the subiterator, but
|
|
// if there are many repeated values, it could be much smaller in totality.
|
|
func (it *HasA) Stats() graph.IteratorStats {
|
|
subitStats := it.primaryIt.Stats()
|
|
// TODO(barakmich): These should really come from the quadstore itself
|
|
// and be optimized.
|
|
faninFactor := int64(1)
|
|
fanoutFactor := int64(30)
|
|
nextConstant := int64(2)
|
|
quadConstant := int64(1)
|
|
return graph.IteratorStats{
|
|
NextCost: quadConstant + subitStats.NextCost,
|
|
ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost,
|
|
Size: faninFactor * subitStats.Size,
|
|
Next: it.runstats.Next,
|
|
Contains: it.runstats.Contains,
|
|
ContainsNext: it.runstats.ContainsNext,
|
|
}
|
|
}
|
|
|
|
// Close the subiterator, the result iterator (if any) and the HasA. It closes
|
|
// all subiterators it can, but returns the first error it encounters.
|
|
func (it *HasA) Close() error {
|
|
err := it.primaryIt.Close()
|
|
|
|
if it.resultIt != nil {
|
|
err2 := it.resultIt.Close()
|
|
if err == nil {
|
|
err = err2
|
|
}
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
// Register this iterator as a HasA.
|
|
func (it *HasA) Type() graph.Type { return graph.HasA }
|
|
|
|
func (it *HasA) Size() (int64, bool) {
|
|
return it.Stats().Size, false
|
|
}
|
|
|
|
var _ graph.Nexter = &HasA{}
|