Rename Check-ish -> Contains-ish
Contains[*] indicates what the check is for. [*] I considered Has/Have, but settled on Contains to avoid confusion with the HasA iterator.
This commit is contained in:
parent
a81005ba21
commit
1606e98d9f
20 changed files with 142 additions and 143 deletions
2
TODO.md
2
TODO.md
|
|
@ -52,7 +52,7 @@ An important failure of MQL before was that it was never well-specified. Let's n
|
|||
### New Iterators
|
||||
|
||||
#### Limit Iterator
|
||||
The necessary component to make mid-query limit work. Acts as a limit on Next(), a passthrough on Check(), and a limit on NextResult()
|
||||
The necessary component to make mid-query limit work. Acts as a limit on Next(), a passthrough on Contains(), and a limit on NextResult()
|
||||
|
||||
## Medium Term
|
||||
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ var benchmarkQueries = []struct {
|
|||
|
||||
// This is more of an optimization problem that will get better over time. This takes a lot
|
||||
// of wrong turns on the walk down to what is ultimately the name, but top AND has it easy
|
||||
// as it has a fixed ID. Exercises Check().
|
||||
// as it has a fixed ID. Exercises Contains().
|
||||
{
|
||||
message: "the helpless checker",
|
||||
long: true,
|
||||
|
|
@ -383,7 +383,7 @@ func BenchmarkVeryLargeSetsSmallIntersection(b *testing.B) {
|
|||
runBench(2, b)
|
||||
}
|
||||
|
||||
func BenchmarkHelplessChecker(b *testing.B) {
|
||||
func BenchmarkHelplessContainsChecker(b *testing.B) {
|
||||
runBench(3, b)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -89,9 +89,8 @@ type Iterator interface {
|
|||
// from the bottom up.
|
||||
NextResult() bool
|
||||
|
||||
// Check(), given a value, returns whether or not that value is within the set
|
||||
// held by this iterator.
|
||||
Check(Value) bool
|
||||
// Contains returns whether the value is within the set held by the iterator.
|
||||
Contains(Value) bool
|
||||
|
||||
// Start iteration from the beginning
|
||||
Reset()
|
||||
|
|
@ -161,9 +160,9 @@ type FixedIterator interface {
|
|||
}
|
||||
|
||||
type IteratorStats struct {
|
||||
CheckCost int64
|
||||
NextCost int64
|
||||
Size int64
|
||||
ContainsCost int64
|
||||
NextCost int64
|
||||
Size int64
|
||||
}
|
||||
|
||||
// Type enumerates the set of Iterator types.
|
||||
|
|
@ -229,20 +228,20 @@ func (t Type) String() string {
|
|||
return types[t]
|
||||
}
|
||||
|
||||
// Utility logging functions for when an iterator gets called Next upon, or Check upon, as
|
||||
// Utility logging functions for when an iterator gets called Next upon, or Contains upon, as
|
||||
// well as what they return. Highly useful for tracing the execution path of a query.
|
||||
func CheckLogIn(it Iterator, val Value) {
|
||||
func ContainsLogIn(it Iterator, val Value) {
|
||||
if glog.V(4) {
|
||||
glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type().String()), it.UID(), val)
|
||||
glog.V(4).Infof("%s %d CHECK CONTAINS %d", strings.ToUpper(it.Type().String()), it.UID(), val)
|
||||
}
|
||||
}
|
||||
|
||||
func CheckLogOut(it Iterator, val Value, good bool) bool {
|
||||
func ContainsLogOut(it Iterator, val Value, good bool) bool {
|
||||
if glog.V(4) {
|
||||
if good {
|
||||
glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type().String()), it.UID(), val)
|
||||
glog.V(4).Infof("%s %d CHECK CONTAINS %d GOOD", strings.ToUpper(it.Type().String()), it.UID(), val)
|
||||
} else {
|
||||
glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type().String()), it.UID(), val)
|
||||
glog.V(4).Infof("%s %d CHECK CONTAINS %d BAD", strings.ToUpper(it.Type().String()), it.UID(), val)
|
||||
}
|
||||
}
|
||||
return good
|
||||
|
|
|
|||
|
|
@ -126,16 +126,16 @@ func (it *Int64) Size() (int64, bool) {
|
|||
return Size, true
|
||||
}
|
||||
|
||||
// Check() for an Int64 is merely seeing if the passed value is
|
||||
// Contains() for an Int64 is merely seeing if the passed value is
|
||||
// withing the range, assuming the value is an int64.
|
||||
func (it *Int64) Check(tsv graph.Value) bool {
|
||||
graph.CheckLogIn(it, tsv)
|
||||
func (it *Int64) Contains(tsv graph.Value) bool {
|
||||
graph.ContainsLogIn(it, tsv)
|
||||
v := tsv.(int64)
|
||||
if it.min <= v && v <= it.max {
|
||||
it.result = v
|
||||
return graph.CheckLogOut(it, v, true)
|
||||
return graph.ContainsLogOut(it, v, true)
|
||||
}
|
||||
return graph.CheckLogOut(it, v, false)
|
||||
return graph.ContainsLogOut(it, v, false)
|
||||
}
|
||||
|
||||
// The type of this iterator is an "all". This is important, as it puts it in
|
||||
|
|
@ -150,8 +150,8 @@ func (it *Int64) Optimize() (graph.Iterator, bool) { return it, false }
|
|||
func (it *Int64) Stats() graph.IteratorStats {
|
||||
s, _ := it.Size()
|
||||
return graph.IteratorStats{
|
||||
CheckCost: 1,
|
||||
NextCost: 1,
|
||||
Size: s,
|
||||
ContainsCost: 1,
|
||||
NextCost: 1,
|
||||
Size: s,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,11 +6,11 @@
|
|||
//
|
||||
// It accomplishes this in one of two ways. If it is a Next()ed iterator (that
|
||||
// is, it is a top level iterator, or on the "Next() path", then it will Next()
|
||||
// it's primary iterator (helpfully, and.primary_it) and Check() the resultant
|
||||
// it's primary iterator (helpfully, and.primary_it) and Contains() the resultant
|
||||
// value against it's other iterators. If it matches all of them, then it
|
||||
// returns that value. Otherwise, it repeats the process.
|
||||
//
|
||||
// If it's on a Check() path, it merely Check()s every iterator, and returns the
|
||||
// If it's on a Contains() path, it merely Contains()s every iterator, and returns the
|
||||
// logical AND of each result.
|
||||
|
||||
package iterator
|
||||
|
|
@ -86,7 +86,7 @@ func (it *And) Clone() graph.Iterator {
|
|||
and.AddSubIterator(sub.Clone())
|
||||
}
|
||||
if it.checkList != nil {
|
||||
and.optimizeCheck()
|
||||
and.optimizeContains()
|
||||
}
|
||||
return and
|
||||
}
|
||||
|
|
@ -164,7 +164,7 @@ func (it *And) Next() (graph.Value, bool) {
|
|||
if !exists {
|
||||
return graph.NextLogOut(it, nil, false)
|
||||
}
|
||||
if it.checkSubIts(curr) {
|
||||
if it.subItsContain(curr) {
|
||||
it.result = curr
|
||||
return graph.NextLogOut(it, curr, true)
|
||||
}
|
||||
|
|
@ -177,10 +177,10 @@ func (it *And) Result() graph.Value {
|
|||
}
|
||||
|
||||
// Checks a value against the non-primary iterators, in order.
|
||||
func (it *And) checkSubIts(val graph.Value) bool {
|
||||
func (it *And) subItsContain(val graph.Value) bool {
|
||||
var subIsGood = true
|
||||
for _, sub := range it.internalIterators {
|
||||
subIsGood = sub.Check(val)
|
||||
subIsGood = sub.Contains(val)
|
||||
if !subIsGood {
|
||||
break
|
||||
}
|
||||
|
|
@ -188,10 +188,10 @@ func (it *And) checkSubIts(val graph.Value) bool {
|
|||
return subIsGood
|
||||
}
|
||||
|
||||
func (it *And) checkCheckList(val graph.Value) bool {
|
||||
func (it *And) checkContainsList(val graph.Value) bool {
|
||||
ok := true
|
||||
for _, c := range it.checkList {
|
||||
ok = c.Check(val)
|
||||
ok = c.Contains(val)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
|
|
@ -199,25 +199,25 @@ func (it *And) checkCheckList(val graph.Value) bool {
|
|||
if ok {
|
||||
it.result = val
|
||||
}
|
||||
return graph.CheckLogOut(it, val, ok)
|
||||
return graph.ContainsLogOut(it, val, ok)
|
||||
}
|
||||
|
||||
// Check a value against the entire iterator, in order.
|
||||
func (it *And) Check(val graph.Value) bool {
|
||||
graph.CheckLogIn(it, val)
|
||||
func (it *And) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
if it.checkList != nil {
|
||||
return it.checkCheckList(val)
|
||||
return it.checkContainsList(val)
|
||||
}
|
||||
mainGood := it.primaryIt.Check(val)
|
||||
mainGood := it.primaryIt.Contains(val)
|
||||
if !mainGood {
|
||||
return graph.CheckLogOut(it, val, false)
|
||||
return graph.ContainsLogOut(it, val, false)
|
||||
}
|
||||
othersGood := it.checkSubIts(val)
|
||||
othersGood := it.subItsContain(val)
|
||||
if !othersGood {
|
||||
return graph.CheckLogOut(it, val, false)
|
||||
return graph.ContainsLogOut(it, val, false)
|
||||
}
|
||||
it.result = val
|
||||
return graph.CheckLogOut(it, val, true)
|
||||
return graph.ContainsLogOut(it, val, true)
|
||||
}
|
||||
|
||||
// Returns the approximate size of the And iterator. Because we're dealing
|
||||
|
|
|
|||
|
|
@ -38,10 +38,10 @@ import (
|
|||
// In short, tread lightly.
|
||||
|
||||
// Optimizes the And, by picking the most efficient way to Next() and
|
||||
// Check() its subiterators. For SQL fans, this is equivalent to JOIN.
|
||||
// Contains() its subiterators. For SQL fans, this is equivalent to JOIN.
|
||||
func (it *And) Optimize() (graph.Iterator, bool) {
|
||||
// First, let's get the slice of iterators, in order (first one is Next()ed,
|
||||
// the rest are Check()ed)
|
||||
// the rest are Contains()ed)
|
||||
old := it.SubIterators()
|
||||
|
||||
// And call Optimize() on our subtree, replacing each one in the order we
|
||||
|
|
@ -84,7 +84,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
|
|||
// Move the tags hanging on us (like any good replacement).
|
||||
newAnd.tags.CopyFrom(it)
|
||||
|
||||
newAnd.optimizeCheck()
|
||||
newAnd.optimizeContains()
|
||||
|
||||
// And close ourselves but not our subiterators -- some may still be alive in
|
||||
// the new And (they were unchanged upon calling Optimize() on them, at the
|
||||
|
|
@ -142,7 +142,7 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
|
|||
|
||||
// Find the iterator with the projected "best" total cost.
|
||||
// Total cost is defined as The Next()ed iterator's cost to Next() out
|
||||
// all of it's contents, and to Check() each of those against everyone
|
||||
// all of it's contents, and to Contains() each of those against everyone
|
||||
// else.
|
||||
for _, it := range its {
|
||||
if _, canNext := it.(graph.Nexter); !canNext {
|
||||
|
|
@ -159,7 +159,7 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
|
|||
continue
|
||||
}
|
||||
stats := f.Stats()
|
||||
cost += stats.CheckCost
|
||||
cost += stats.ContainsCost
|
||||
}
|
||||
cost *= rootStats.Size
|
||||
if cost < bestCost {
|
||||
|
|
@ -169,7 +169,7 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
|
|||
}
|
||||
|
||||
// TODO(barakmich): Optimization of order need not stop here. Picking a smart
|
||||
// Check() order based on probability of getting a false Check() first is
|
||||
// Contains() order based on probability of getting a false Contains() first is
|
||||
// useful (fail faster).
|
||||
|
||||
// Put the best iterator (the one we wish to Next()) at the front...
|
||||
|
|
@ -192,12 +192,12 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
|
|||
type byCost []graph.Iterator
|
||||
|
||||
func (c byCost) Len() int { return len(c) }
|
||||
func (c byCost) Less(i, j int) bool { return c[i].Stats().CheckCost < c[j].Stats().CheckCost }
|
||||
func (c byCost) Less(i, j int) bool { return c[i].Stats().ContainsCost < c[j].Stats().ContainsCost }
|
||||
func (c byCost) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
|
||||
|
||||
// optimizeCheck(l) creates an alternate check list, containing the same contents
|
||||
// optimizeContains() creates an alternate check list, containing the same contents
|
||||
// but with a new ordering, however it wishes.
|
||||
func (it *And) optimizeCheck() {
|
||||
func (it *And) optimizeContains() {
|
||||
// GetSubIterators allocates, so this is currently safe.
|
||||
// TODO(kortschak) Reuse it.checkList if possible.
|
||||
// This involves providing GetSubIterators with a slice to fill.
|
||||
|
|
@ -298,21 +298,21 @@ func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator {
|
|||
// For now, however, it's pretty static.
|
||||
func (it *And) Stats() graph.IteratorStats {
|
||||
primaryStats := it.primaryIt.Stats()
|
||||
CheckCost := primaryStats.CheckCost
|
||||
ContainsCost := primaryStats.ContainsCost
|
||||
NextCost := primaryStats.NextCost
|
||||
Size := primaryStats.Size
|
||||
for _, sub := range it.internalIterators {
|
||||
stats := sub.Stats()
|
||||
NextCost += stats.CheckCost
|
||||
CheckCost += stats.CheckCost
|
||||
NextCost += stats.ContainsCost
|
||||
ContainsCost += stats.ContainsCost
|
||||
if Size > stats.Size {
|
||||
Size = stats.Size
|
||||
}
|
||||
}
|
||||
return graph.IteratorStats{
|
||||
CheckCost: CheckCost,
|
||||
NextCost: NextCost,
|
||||
Size: Size,
|
||||
ContainsCost: ContainsCost,
|
||||
NextCost: NextCost,
|
||||
Size: Size,
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,18 +121,18 @@ func (it *Fixed) DebugString(indent int) string {
|
|||
func (it *Fixed) Type() graph.Type { return graph.Fixed }
|
||||
|
||||
// Check if the passed value is equal to one of the values stored in the iterator.
|
||||
func (it *Fixed) Check(v graph.Value) bool {
|
||||
func (it *Fixed) Contains(v graph.Value) bool {
|
||||
// Could be optimized by keeping it sorted or using a better datastructure.
|
||||
// However, for fixed iterators, which are by definition kind of tiny, this
|
||||
// isn't a big issue.
|
||||
graph.CheckLogIn(it, v)
|
||||
graph.ContainsLogIn(it, v)
|
||||
for _, x := range it.values {
|
||||
if it.cmp(x, v) {
|
||||
it.result = x
|
||||
return graph.CheckLogOut(it, v, true)
|
||||
return graph.ContainsLogOut(it, v, true)
|
||||
}
|
||||
}
|
||||
return graph.CheckLogOut(it, v, false)
|
||||
return graph.ContainsLogOut(it, v, false)
|
||||
}
|
||||
|
||||
// Return the next stored value from the iterator.
|
||||
|
|
@ -181,12 +181,12 @@ func (it *Fixed) Size() (int64, bool) {
|
|||
return int64(len(it.values)), true
|
||||
}
|
||||
|
||||
// As we right now have to scan the entire list, Next and Check are linear with the
|
||||
// As we right now have to scan the entire list, Next and Contains are linear with the
|
||||
// size. However, a better data structure could remove these limits.
|
||||
func (it *Fixed) Stats() graph.IteratorStats {
|
||||
return graph.IteratorStats{
|
||||
CheckCost: int64(len(it.values)),
|
||||
NextCost: int64(len(it.values)),
|
||||
Size: int64(len(it.values)),
|
||||
ContainsCost: int64(len(it.values)),
|
||||
NextCost: int64(len(it.values)),
|
||||
Size: int64(len(it.values)),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,10 +23,10 @@ package iterator
|
|||
// path. That's okay -- in reality, it can be viewed as returning the value for
|
||||
// a new triple, but to make logic much simpler, here we have the HasA.
|
||||
//
|
||||
// Likewise, it's important to think about Check()ing a HasA. When given a
|
||||
// Likewise, it's important to think about Contains()ing a HasA. When given a
|
||||
// value to check, it means "Check all predicates that have this value for your
|
||||
// direction against the subiterator." This would imply that there's more than
|
||||
// one possibility for the same Check()ed value. While we could return the
|
||||
// one possibility for the same Contains()ed value. While we could return the
|
||||
// number of options, it's simpler to return one, and then call NextResult()
|
||||
// enough times to enumerate the options. (In fact, one could argue that the
|
||||
// raison d'etre for NextResult() is this iterator).
|
||||
|
|
@ -45,7 +45,7 @@ import (
|
|||
|
||||
// A HasA consists of a reference back to the graph.TripleStore that it references,
|
||||
// a primary subiterator, a direction in which the triples for that subiterator point,
|
||||
// and a temporary holder for the iterator generated on Check().
|
||||
// and a temporary holder for the iterator generated on Contains().
|
||||
type HasA struct {
|
||||
uid uint64
|
||||
tags graph.Tagger
|
||||
|
|
@ -140,9 +140,9 @@ func (it *HasA) DebugString(indent int) string {
|
|||
|
||||
// Check a value against our internal iterator. In order to do this, we must first open a new
|
||||
// iterator of "triples that have `val` in our direction", given to us by the triple store,
|
||||
// and then Next() values out of that iterator and Check() them against our subiterator.
|
||||
func (it *HasA) Check(val graph.Value) bool {
|
||||
graph.CheckLogIn(it, val)
|
||||
// and then Next() values out of that iterator and Contains() them against our subiterator.
|
||||
func (it *HasA) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
if glog.V(4) {
|
||||
glog.V(4).Infoln("Id is", it.ts.NameOf(val))
|
||||
}
|
||||
|
|
@ -151,13 +151,13 @@ func (it *HasA) Check(val graph.Value) bool {
|
|||
it.resultIt.Close()
|
||||
}
|
||||
it.resultIt = it.ts.TripleIterator(it.dir, val)
|
||||
return graph.CheckLogOut(it, val, it.GetCheckResult())
|
||||
return graph.ContainsLogOut(it, val, it.NextContains())
|
||||
}
|
||||
|
||||
// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the
|
||||
// NextContains() is shared code between Contains() and GetNextResult() -- calls next on the
|
||||
// result iterator (a triple iterator based on the last checked value) and returns true if
|
||||
// another match is made.
|
||||
func (it *HasA) GetCheckResult() bool {
|
||||
func (it *HasA) NextContains() bool {
|
||||
for {
|
||||
linkVal, ok := graph.Next(it.resultIt)
|
||||
if !ok {
|
||||
|
|
@ -166,7 +166,7 @@ func (it *HasA) GetCheckResult() bool {
|
|||
if glog.V(4) {
|
||||
glog.V(4).Infoln("Quad is", it.ts.Quad(linkVal))
|
||||
}
|
||||
if it.primaryIt.Check(linkVal) {
|
||||
if it.primaryIt.Contains(linkVal) {
|
||||
it.result = it.ts.TripleDirection(linkVal, it.dir)
|
||||
return true
|
||||
}
|
||||
|
|
@ -178,17 +178,17 @@ func (it *HasA) GetCheckResult() bool {
|
|||
func (it *HasA) NextResult() bool {
|
||||
// Order here is important. If the subiterator has a NextResult, then we
|
||||
// need do nothing -- there is a next result, and we shouldn't move forward.
|
||||
// However, we then need to get the next result from our last Check().
|
||||
// However, we then need to get the next result from our last Contains().
|
||||
//
|
||||
// The upshot is, the end of NextResult() bubbles up from the bottom of the
|
||||
// iterator tree up, and we need to respect that.
|
||||
if it.primaryIt.NextResult() {
|
||||
return true
|
||||
}
|
||||
return it.GetCheckResult()
|
||||
return it.NextContains()
|
||||
}
|
||||
|
||||
// Get the next result from this iterator. This is simpler than Check. We have a
|
||||
// Get the next result from this iterator. This is simpler than Contains. We have a
|
||||
// subiterator we can get a value from, and we can take that resultant triple,
|
||||
// pull our direction out of it, and return that.
|
||||
func (it *HasA) Next() (graph.Value, bool) {
|
||||
|
|
@ -214,7 +214,7 @@ func (it *HasA) Result() graph.Value {
|
|||
|
||||
// GetStats() returns the statistics on the HasA iterator. This is curious. Next
|
||||
// cost is easy, it's an extra call or so on top of the subiterator Next cost.
|
||||
// CheckCost involves going to the graph.TripleStore, iterating out values, and hoping
|
||||
// ContainsCost involves going to the graph.TripleStore, iterating out values, and hoping
|
||||
// one sticks -- potentially expensive, depending on fanout. Size, however, is
|
||||
// potentially smaller. we know at worst it's the size of the subiterator, but
|
||||
// if there are many repeated values, it could be much smaller in totality.
|
||||
|
|
@ -227,9 +227,9 @@ func (it *HasA) Stats() graph.IteratorStats {
|
|||
nextConstant := int64(2)
|
||||
tripleConstant := int64(1)
|
||||
return graph.IteratorStats{
|
||||
NextCost: tripleConstant + subitStats.NextCost,
|
||||
CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost,
|
||||
Size: faninFactor * subitStats.Size,
|
||||
NextCost: tripleConstant + subitStats.NextCost,
|
||||
ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost,
|
||||
Size: faninFactor * subitStats.Size,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ func (it *Null) TagResults(dst map[string]graph.Value) {
|
|||
}
|
||||
}
|
||||
|
||||
func (it *Null) Check(graph.Value) bool {
|
||||
func (it *Null) Contains(graph.Value) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ package iterator
|
|||
// LinksTo is therefore sensitive to growing with a fanout. (A small-sized
|
||||
// subiterator could cause LinksTo to be large).
|
||||
//
|
||||
// Check()ing a LinksTo means, given a link, take the direction we care about
|
||||
// Contains()ing a LinksTo means, given a link, take the direction we care about
|
||||
// and check if it's in our subiterator. Checking is therefore fairly cheap, and
|
||||
// similar to checking the subiterator alone.
|
||||
//
|
||||
|
|
@ -116,14 +116,14 @@ func (it *LinksTo) DebugString(indent int) string {
|
|||
|
||||
// If it checks in the right direction for the subiterator, it is a valid link
|
||||
// for the LinksTo.
|
||||
func (it *LinksTo) Check(val graph.Value) bool {
|
||||
graph.CheckLogIn(it, val)
|
||||
func (it *LinksTo) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
node := it.ts.TripleDirection(val, it.dir)
|
||||
if it.primaryIt.Check(node) {
|
||||
if it.primaryIt.Contains(node) {
|
||||
it.result = val
|
||||
return graph.CheckLogOut(it, val, true)
|
||||
return graph.ContainsLogOut(it, val, true)
|
||||
}
|
||||
return graph.CheckLogOut(it, val, false)
|
||||
return graph.ContainsLogOut(it, val, false)
|
||||
}
|
||||
|
||||
// Return a list containing only our subiterator.
|
||||
|
|
@ -198,9 +198,9 @@ func (it *LinksTo) Stats() graph.IteratorStats {
|
|||
checkConstant := int64(1)
|
||||
nextConstant := int64(2)
|
||||
return graph.IteratorStats{
|
||||
NextCost: nextConstant + subitStats.NextCost,
|
||||
CheckCost: checkConstant + subitStats.CheckCost,
|
||||
Size: fanoutFactor * subitStats.Size,
|
||||
NextCost: nextConstant + subitStats.NextCost,
|
||||
ContainsCost: checkConstant + subitStats.ContainsCost,
|
||||
Size: fanoutFactor * subitStats.Size,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -100,11 +100,11 @@ func (it *Optional) SubIterators() []graph.Iterator {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Check() is the real hack of this iterator. It always returns true, regardless
|
||||
// Contains() is the real hack of this iterator. It always returns true, regardless
|
||||
// of whether the subiterator matched. But we keep track of whether the subiterator
|
||||
// matched for results purposes.
|
||||
func (it *Optional) Check(val graph.Value) bool {
|
||||
checked := it.subIt.Check(val)
|
||||
func (it *Optional) Contains(val graph.Value) bool {
|
||||
checked := it.subIt.Contains(val)
|
||||
it.lastCheck = checked
|
||||
it.result = val
|
||||
return true
|
||||
|
|
@ -146,9 +146,9 @@ func (it *Optional) Optimize() (graph.Iterator, bool) {
|
|||
func (it *Optional) Stats() graph.IteratorStats {
|
||||
subStats := it.subIt.Stats()
|
||||
return graph.IteratorStats{
|
||||
CheckCost: subStats.CheckCost,
|
||||
NextCost: int64(1 << 62),
|
||||
Size: subStats.Size,
|
||||
ContainsCost: subStats.ContainsCost,
|
||||
NextCost: int64(1 << 62),
|
||||
Size: subStats.Size,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -177,10 +177,10 @@ func (it *Or) Result() graph.Value {
|
|||
}
|
||||
|
||||
// Checks a value against the iterators, in order.
|
||||
func (it *Or) checkSubIts(val graph.Value) bool {
|
||||
func (it *Or) subItsContain(val graph.Value) bool {
|
||||
var subIsGood = false
|
||||
for i, sub := range it.internalIterators {
|
||||
subIsGood = sub.Check(val)
|
||||
subIsGood = sub.Contains(val)
|
||||
if subIsGood {
|
||||
it.currentIterator = i
|
||||
break
|
||||
|
|
@ -190,14 +190,14 @@ func (it *Or) checkSubIts(val graph.Value) bool {
|
|||
}
|
||||
|
||||
// Check a value against the entire graph.iterator, in order.
|
||||
func (it *Or) Check(val graph.Value) bool {
|
||||
graph.CheckLogIn(it, val)
|
||||
anyGood := it.checkSubIts(val)
|
||||
func (it *Or) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
anyGood := it.subItsContain(val)
|
||||
if !anyGood {
|
||||
return graph.CheckLogOut(it, val, false)
|
||||
return graph.ContainsLogOut(it, val, false)
|
||||
}
|
||||
it.result = val
|
||||
return graph.CheckLogOut(it, val, true)
|
||||
return graph.ContainsLogOut(it, val, true)
|
||||
}
|
||||
|
||||
// Returns the approximate size of the Or graph.iterator. Because we're dealing
|
||||
|
|
@ -277,13 +277,13 @@ func (it *Or) Optimize() (graph.Iterator, bool) {
|
|||
}
|
||||
|
||||
func (it *Or) Stats() graph.IteratorStats {
|
||||
CheckCost := int64(0)
|
||||
ContainsCost := int64(0)
|
||||
NextCost := int64(0)
|
||||
Size := int64(0)
|
||||
for _, sub := range it.internalIterators {
|
||||
stats := sub.Stats()
|
||||
NextCost += stats.NextCost
|
||||
CheckCost += stats.CheckCost
|
||||
ContainsCost += stats.ContainsCost
|
||||
if it.isShortCircuiting {
|
||||
if Size < stats.Size {
|
||||
Size = stats.Size
|
||||
|
|
@ -293,9 +293,9 @@ func (it *Or) Stats() graph.IteratorStats {
|
|||
}
|
||||
}
|
||||
return graph.IteratorStats{
|
||||
CheckCost: CheckCost,
|
||||
NextCost: NextCost,
|
||||
Size: Size,
|
||||
ContainsCost: ContainsCost,
|
||||
NextCost: NextCost,
|
||||
Size: Size,
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,13 +66,13 @@ func TestOrIteratorBasics(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, v := range []int{2, 3, 21} {
|
||||
if !or.Check(v) {
|
||||
if !or.Contains(v) {
|
||||
t.Errorf("Failed to correctly check %d as true", v)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range []int{22, 5, 0} {
|
||||
if or.Check(v) {
|
||||
if or.Contains(v) {
|
||||
t.Errorf("Failed to correctly check %d as false", v)
|
||||
}
|
||||
}
|
||||
|
|
@ -125,12 +125,12 @@ func TestShortCircuitingOrBasics(t *testing.T) {
|
|||
or.AddSubIterator(f1)
|
||||
or.AddSubIterator(f2)
|
||||
for _, v := range []int{2, 3, 21} {
|
||||
if !or.Check(v) {
|
||||
if !or.Contains(v) {
|
||||
t.Errorf("Failed to correctly check %d as true", v)
|
||||
}
|
||||
}
|
||||
for _, v := range []int{22, 5, 0} {
|
||||
if or.Check(v) {
|
||||
if or.Contains(v) {
|
||||
t.Errorf("Failed to correctly check %d as false", v)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -171,11 +171,11 @@ func (it *Comparison) SubIterators() []graph.Iterator {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (it *Comparison) Check(val graph.Value) bool {
|
||||
func (it *Comparison) Contains(val graph.Value) bool {
|
||||
if !it.doComparison(val) {
|
||||
return false
|
||||
}
|
||||
return it.subIt.Check(val)
|
||||
return it.subIt.Contains(val)
|
||||
}
|
||||
|
||||
// If we failed the check, then the subiterator should not contribute to the result
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ func TestValueComparison(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
var vciCheckTests = []struct {
|
||||
var vciContainsTests = []struct {
|
||||
message string
|
||||
operator Operator
|
||||
check graph.Value
|
||||
|
|
@ -114,10 +114,10 @@ var vciCheckTests = []struct {
|
|||
},
|
||||
}
|
||||
|
||||
func TestVCICheck(t *testing.T) {
|
||||
for _, test := range vciCheckTests {
|
||||
func TestVCIContains(t *testing.T) {
|
||||
for _, test := range vciContainsTests {
|
||||
vc := NewComparison(simpleFixedIterator(), test.operator, int64(2), simpleStore)
|
||||
if vc.Check(test.check) != test.expect {
|
||||
if vc.Contains(test.check) != test.expect {
|
||||
t.Errorf("Failed to show %s", test.message)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ func (it *AllIterator) SubIterators() []graph.Iterator {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (it *AllIterator) Check(v graph.Value) bool {
|
||||
func (it *AllIterator) Contains(v graph.Value) bool {
|
||||
it.result = v
|
||||
return true
|
||||
}
|
||||
|
|
@ -174,8 +174,8 @@ func (it *AllIterator) Optimize() (graph.Iterator, bool) {
|
|||
func (it *AllIterator) Stats() graph.IteratorStats {
|
||||
s, _ := it.Size()
|
||||
return graph.IteratorStats{
|
||||
CheckCost: 1,
|
||||
NextCost: 2,
|
||||
Size: s,
|
||||
ContainsCost: 1,
|
||||
NextCost: 2,
|
||||
Size: s,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
|
|||
panic("unreachable")
|
||||
}
|
||||
|
||||
func (it *Iterator) Check(v graph.Value) bool {
|
||||
func (it *Iterator) Contains(v graph.Value) bool {
|
||||
val := v.([]byte)
|
||||
if val[0] == 'z' {
|
||||
return false
|
||||
|
|
@ -262,8 +262,8 @@ func (it *Iterator) Optimize() (graph.Iterator, bool) {
|
|||
func (it *Iterator) Stats() graph.IteratorStats {
|
||||
s, _ := it.Size()
|
||||
return graph.IteratorStats{
|
||||
CheckCost: 1,
|
||||
NextCost: 2,
|
||||
Size: s,
|
||||
ContainsCost: 1,
|
||||
NextCost: 2,
|
||||
Size: s,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -251,14 +251,14 @@ func TestIterator(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, pq := range expect {
|
||||
if !it.Check(qs.ValueOf(pq)) {
|
||||
if !it.Contains(qs.ValueOf(pq)) {
|
||||
t.Errorf("Failed to find and check %q correctly", pq)
|
||||
}
|
||||
}
|
||||
// FIXME(kortschak) Why does this fail?
|
||||
/*
|
||||
for _, pq := range []string{"baller"} {
|
||||
if it.Check(qs.ValueOf(pq)) {
|
||||
if it.Contains(qs.ValueOf(pq)) {
|
||||
t.Errorf("Failed to check %q correctly", pq)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,13 +124,13 @@ func (it *Iterator) Size() (int64, bool) {
|
|||
return int64(it.tree.Len()), true
|
||||
}
|
||||
|
||||
func (it *Iterator) Check(v graph.Value) bool {
|
||||
graph.CheckLogIn(it, v)
|
||||
func (it *Iterator) Contains(v graph.Value) bool {
|
||||
graph.ContainsLogIn(it, v)
|
||||
if it.tree.Has(Int64(v.(int64))) {
|
||||
it.result = v
|
||||
return graph.CheckLogOut(it, v, true)
|
||||
return graph.ContainsLogOut(it, v, true)
|
||||
}
|
||||
return graph.CheckLogOut(it, v, false)
|
||||
return graph.ContainsLogOut(it, v, false)
|
||||
}
|
||||
|
||||
func (it *Iterator) DebugString(indent int) string {
|
||||
|
|
@ -156,8 +156,8 @@ func (it *Iterator) Optimize() (graph.Iterator, bool) {
|
|||
|
||||
func (it *Iterator) Stats() graph.IteratorStats {
|
||||
return graph.IteratorStats{
|
||||
CheckCost: int64(math.Log(float64(it.tree.Len()))) + 1,
|
||||
NextCost: 1,
|
||||
Size: int64(it.tree.Len()),
|
||||
ContainsCost: int64(math.Log(float64(it.tree.Len()))) + 1,
|
||||
NextCost: 1,
|
||||
Size: int64(it.tree.Len()),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -174,11 +174,11 @@ func (it *Iterator) SubIterators() []graph.Iterator {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (it *Iterator) Check(v graph.Value) bool {
|
||||
graph.CheckLogIn(it, v)
|
||||
func (it *Iterator) Contains(v graph.Value) bool {
|
||||
graph.ContainsLogIn(it, v)
|
||||
if it.isAll {
|
||||
it.result = v
|
||||
return graph.CheckLogOut(it, v, true)
|
||||
return graph.ContainsLogOut(it, v, true)
|
||||
}
|
||||
var offset int
|
||||
switch it.dir {
|
||||
|
|
@ -194,9 +194,9 @@ func (it *Iterator) Check(v graph.Value) bool {
|
|||
val := v.(string)[offset : it.qs.hasher.Size()*2+offset]
|
||||
if val == it.hash {
|
||||
it.result = v
|
||||
return graph.CheckLogOut(it, v, true)
|
||||
return graph.ContainsLogOut(it, v, true)
|
||||
}
|
||||
return graph.CheckLogOut(it, v, false)
|
||||
return graph.ContainsLogOut(it, v, false)
|
||||
}
|
||||
|
||||
func (it *Iterator) Size() (int64, bool) {
|
||||
|
|
@ -229,8 +229,8 @@ func (it *Iterator) DebugString(indent int) string {
|
|||
func (it *Iterator) Stats() graph.IteratorStats {
|
||||
size, _ := it.Size()
|
||||
return graph.IteratorStats{
|
||||
CheckCost: 1,
|
||||
NextCost: 5,
|
||||
Size: size,
|
||||
ContainsCost: 1,
|
||||
NextCost: 5,
|
||||
Size: size,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue