330 lines
10 KiB
Go
330 lines
10 KiB
Go
// Copyright 2014 The Cayley Authors. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package graph
|
|
|
|
// Perhaps the most tricky file in this entire module. Really a method on the
|
|
// AndIterator, but important enough to deserve its own file.
|
|
//
|
|
// Calling Optimize() on an And iterator, like any iterator, requires that we
|
|
// preserve the underlying meaning. However, the And has many choices, namely,
|
|
// which one of it's subiterators will be the branch that does the Next()ing,
|
|
// and which ordering of the remaining iterators is the most efficient. In
|
|
// short, this is where a lot of the query optimization happens, and there are
|
|
// many wins to be had here, as well as many bad bugs. The worst class of bug
|
|
// changes the meaning of the query. The second worst class makes things really
|
|
// slow.
|
|
//
|
|
// The good news is this: If Optimize() is never called (turned off, perhaps) we can
|
|
// be sure the results are as good as the query language called for.
|
|
//
|
|
// In short, tread lightly.
|
|
|
|
import (
|
|
"container/list"
|
|
)
|
|
|
|
// Optimizes the AndIterator, by picking the most efficient way to Next() and
|
|
// Check() its subiterators. For SQL fans, this is equivalent to JOIN.
|
|
func (and *AndIterator) Optimize() (Iterator, bool) {
|
|
// First, let's get the list of iterators, in order (first one is Next()ed,
|
|
// the rest are Check()ed)
|
|
oldItList := and.GetSubIterators()
|
|
|
|
// And call Optimize() on our subtree, replacing each one in the order we
|
|
// found them. it_list is the newly optimized versions of these, and changed
|
|
// is another list, of only the ones that have returned replacements and
|
|
// changed.
|
|
itList := optimizeSubIterators(oldItList)
|
|
|
|
// Close the replaced iterators (they ought to close themselves, but Close()
|
|
// is idempotent, so this just protects against any machinations).
|
|
closeIteratorList(oldItList, nil)
|
|
|
|
// If we can find only one subiterator which is equivalent to this whole and,
|
|
// we can replace the And...
|
|
out := and.optimizeReplacement(itList)
|
|
if out != nil {
|
|
// ...Move the tags to the replacement...
|
|
moveTagsTo(out, and)
|
|
// ...Close everyone except `out`, our replacement...
|
|
closeIteratorList(itList, out)
|
|
// ...And return it.
|
|
return out, true
|
|
}
|
|
|
|
// And now, without changing any of the iterators, we reorder them. it_list is
|
|
// now a permutation of itself, but the contents are unchanged.
|
|
itList = optimizeOrder(itList)
|
|
|
|
// Okay! At this point we have an optimized order.
|
|
|
|
// The easiest thing to do at this point is merely to create a new And iterator
|
|
// and replace ourselves with our (reordered, optimized) clone.
|
|
newAnd := NewAndIterator()
|
|
|
|
// Add the subiterators in order.
|
|
for e := itList.Front(); e != nil; e = e.Next() {
|
|
newAnd.AddSubIterator(e.Value.(Iterator))
|
|
}
|
|
|
|
// Move the tags hanging on us (like any good replacement).
|
|
newAnd.CopyTagsFrom(and)
|
|
|
|
newAnd.optimizeCheck()
|
|
|
|
// And close ourselves but not our subiterators -- some may still be alive in
|
|
// the new And (they were unchanged upon calling Optimize() on them, at the
|
|
// start).
|
|
and.cleanUp()
|
|
return newAnd, true
|
|
}
|
|
|
|
// Closes a list of iterators, except the one passed in `except`. Closes all
|
|
// of the iterators in the list if `except` is nil.
|
|
func closeIteratorList(l *list.List, except Iterator) {
|
|
for e := l.Front(); e != nil; e = e.Next() {
|
|
it := e.Value.(Iterator)
|
|
if it != except {
|
|
e.Value.(Iterator).Close()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find if there is a single subiterator which is a valid replacement for this
|
|
// AndIterator.
|
|
func (and *AndIterator) optimizeReplacement(itList *list.List) Iterator {
|
|
// If we were created with no SubIterators, we're as good as Null.
|
|
if itList.Len() == 0 {
|
|
return &NullIterator{}
|
|
}
|
|
if itList.Len() == 1 {
|
|
// When there's only one iterator, there's only one choice.
|
|
return itList.Front().Value.(Iterator)
|
|
}
|
|
// If any of our subiterators, post-optimization, are also Null, then
|
|
// there's no point in continuing the branch, we will have no results
|
|
// and we are null as well.
|
|
if hasAnyNullIterators(itList) {
|
|
return &NullIterator{}
|
|
}
|
|
|
|
// If we have one useful iterator, use that.
|
|
it := hasOneUsefulIterator(itList)
|
|
if it != nil {
|
|
return it
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// optimizeOrder(l) takes a list and returns a list, containing the same contents
|
|
// but with a new ordering, however it wishes.
|
|
func optimizeOrder(l *list.List) *list.List {
|
|
out := list.New()
|
|
var bestIt Iterator
|
|
bestCost := int64(1 << 62)
|
|
// bad contains iterators that can't be (efficiently) nexted, such as
|
|
// "optional" or "not". Separate them out and tack them on at the end.
|
|
bad := list.New()
|
|
|
|
// Find the iterator with the projected "best" total cost.
|
|
// Total cost is defined as The Next()ed iterator's cost to Next() out
|
|
// all of it's contents, and to Check() each of those against everyone
|
|
// else.
|
|
for e := l.Front(); e != nil; e = e.Next() {
|
|
it := e.Value.(Iterator)
|
|
if !it.Nextable() {
|
|
bad.PushBack(it)
|
|
continue
|
|
}
|
|
rootStats := e.Value.(Iterator).GetStats()
|
|
projectedCost := rootStats.NextCost
|
|
for f := l.Front(); f != nil; f = f.Next() {
|
|
if !f.Value.(Iterator).Nextable() {
|
|
continue
|
|
}
|
|
if f == e {
|
|
continue
|
|
}
|
|
stats := f.Value.(Iterator).GetStats()
|
|
projectedCost += stats.CheckCost
|
|
}
|
|
projectedCost = projectedCost * rootStats.Size
|
|
if projectedCost < bestCost {
|
|
bestIt = it
|
|
bestCost = projectedCost
|
|
}
|
|
}
|
|
|
|
// TODO(barakmich): Optimization of order need not stop here. Picking a smart
|
|
// Check() order based on probability of getting a false Check() first is
|
|
// useful (fail faster).
|
|
|
|
// Put the best iterator (the one we wish to Next()) at the front...
|
|
out.PushBack(bestIt)
|
|
// ...And push everyone else after...
|
|
for e := l.Front(); e != nil; e = e.Next() {
|
|
thisIt := e.Value.(Iterator)
|
|
if !thisIt.Nextable() {
|
|
continue
|
|
}
|
|
if thisIt != bestIt {
|
|
out.PushBack(thisIt)
|
|
}
|
|
}
|
|
// ...And finally, the difficult children on the end.
|
|
out.PushBackList(bad)
|
|
return out
|
|
}
|
|
|
|
// optimizeCheck(l) creates an alternate check list, containing the same contents
|
|
// but with a new ordering, however it wishes.
|
|
func (and *AndIterator) optimizeCheck() {
|
|
subIts := and.GetSubIterators()
|
|
out := list.New()
|
|
|
|
// Find the iterator with the lowest Check() cost, push it to the front, repeat.
|
|
for subIts.Len() != 0 {
|
|
var best *list.Element
|
|
bestCost := int64(1 << 62)
|
|
for e := subIts.Front(); e != nil; e = e.Next() {
|
|
it := e.Value.(Iterator)
|
|
rootStats := it.GetStats()
|
|
projectedCost := rootStats.CheckCost
|
|
if projectedCost < bestCost {
|
|
best = e
|
|
bestCost = projectedCost
|
|
}
|
|
}
|
|
out.PushBack(best.Value)
|
|
subIts.Remove(best)
|
|
}
|
|
|
|
and.checkList = out
|
|
}
|
|
|
|
// If we're replacing ourselves by a single iterator, we need to grab the
|
|
// result tags from the iterators that, while still valid and would hold
|
|
// the same values as this and, are not going to stay.
|
|
// getSubTags() returns a map of the tags for all the subiterators.
|
|
func (and *AndIterator) getSubTags() map[string]bool {
|
|
subs := and.GetSubIterators()
|
|
tags := make(map[string]bool)
|
|
for e := subs.Front(); e != nil; e = e.Next() {
|
|
it := e.Value.(Iterator)
|
|
for _, tag := range it.Tags() {
|
|
tags[tag] = true
|
|
}
|
|
}
|
|
for _, tag := range and.Tags() {
|
|
tags[tag] = true
|
|
}
|
|
return tags
|
|
}
|
|
|
|
// moveTagsTo() gets the tags for all of the And's subiterators and the
|
|
// And itself, and moves them to `out`.
|
|
func moveTagsTo(out Iterator, and *AndIterator) {
|
|
tagmap := and.getSubTags()
|
|
for _, tag := range out.Tags() {
|
|
if tagmap[tag] {
|
|
delete(tagmap, tag)
|
|
}
|
|
}
|
|
for k, _ := range tagmap {
|
|
out.AddTag(k)
|
|
}
|
|
}
|
|
|
|
// optimizeSubIterators(l) takes a list of iterators and calls Optimize() on all
|
|
// of them. It returns two lists -- the first contains the same list as l, where
|
|
// any replacements are made by Optimize() and the second contains the originals
|
|
// which were replaced.
|
|
func optimizeSubIterators(l *list.List) *list.List {
|
|
itList := list.New()
|
|
for e := l.Front(); e != nil; e = e.Next() {
|
|
it := e.Value.(Iterator)
|
|
newIt, change := it.Optimize()
|
|
if change {
|
|
itList.PushBack(newIt)
|
|
} else {
|
|
itList.PushBack(it.Clone())
|
|
}
|
|
}
|
|
return itList
|
|
}
|
|
|
|
// Check a list of iterators for any Null iterators.
|
|
func hasAnyNullIterators(l *list.List) bool {
|
|
for e := l.Front(); e != nil; e = e.Next() {
|
|
it := e.Value.(Iterator)
|
|
if it.Type() == "null" {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// There are two "not-useful" iterators -- namely "null" which returns
|
|
// nothing, and "all" which returns everything. Particularly, we want
|
|
// to see if we're intersecting with a bunch of "all" iterators, and,
|
|
// if we are, then we have only one useful iterator.
|
|
func hasOneUsefulIterator(l *list.List) Iterator {
|
|
usefulCount := 0
|
|
var usefulIt Iterator
|
|
for e := l.Front(); e != nil; e = e.Next() {
|
|
it := e.Value.(Iterator)
|
|
switch it.Type() {
|
|
case "null", "all":
|
|
continue
|
|
case "optional":
|
|
// Optional is weird -- it's not useful, but we can't optimize
|
|
// away from it. Therefore, we skip this optimization
|
|
// if we see one.
|
|
return nil
|
|
default:
|
|
usefulCount++
|
|
usefulIt = it
|
|
}
|
|
}
|
|
|
|
if usefulCount == 1 {
|
|
return usefulIt
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// and.GetStats() lives here in and-iterator-optimize.go because it may
|
|
// in the future return different statistics based on how it is optimized.
|
|
// For now, however, it's pretty static.
|
|
func (and *AndIterator) GetStats() *IteratorStats {
|
|
primaryStats := and.primaryIt.GetStats()
|
|
CheckCost := primaryStats.CheckCost
|
|
NextCost := primaryStats.NextCost
|
|
Size := primaryStats.Size
|
|
for _, it := range and.internalIterators {
|
|
stats := it.GetStats()
|
|
NextCost += stats.CheckCost
|
|
CheckCost += stats.CheckCost
|
|
if Size > stats.Size {
|
|
Size = stats.Size
|
|
}
|
|
}
|
|
return &IteratorStats{
|
|
CheckCost: CheckCost,
|
|
NextCost: NextCost,
|
|
Size: Size,
|
|
}
|
|
|
|
}
|