diff --git a/graph/iterator.go b/graph/iterator.go index 972f334..e7c2ad8 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -153,6 +153,19 @@ func Next(it Iterator) (Value, bool) { return nil, false } +// Height is a convienence function to measure the height of an iterator tree. +func Height(it Iterator) int { + subs := it.SubIterators() + maxDepth := 0 + for _, sub := range subs { + h := Height(sub) + if h > maxDepth { + maxDepth = h + } + } + return maxDepth + 1 +} + // FixedIterator wraps iterators that are modifiable by addition of fixed value sets. type FixedIterator interface { Iterator @@ -180,6 +193,7 @@ const ( Fixed Not Optional + Materialize ) var ( @@ -200,6 +214,7 @@ var ( "fixed", "not", "optional", + "materialize", } ) diff --git a/graph/iterator/and_iterator_optimize.go b/graph/iterator/and_iterator_optimize.go index 92b6b41..f0adfad 100644 --- a/graph/iterator/and_iterator_optimize.go +++ b/graph/iterator/and_iterator_optimize.go @@ -70,6 +70,8 @@ func (it *And) Optimize() (graph.Iterator, bool) { // now a permutation of itself, but the contents are unchanged. its = optimizeOrder(its) + its = materializeIts(its) + // Okay! At this point we have an optimized order. // The easiest thing to do at this point is merely to create a new And iterator @@ -293,6 +295,21 @@ func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator { return nil } +func materializeIts(its []graph.Iterator) []graph.Iterator { + var out []graph.Iterator + for _, it := range its { + stats := it.Stats() + if stats.Size*stats.NextCost < stats.ContainsCost { + if graph.Height(it) > 10 { + out = append(out, NewMaterialize(it)) + continue + } + } + out = append(out, it) + } + return out +} + // and.Stats() lives here in and-iterator-optimize.go because it may // in the future return different statistics based on how it is optimized. // For now, however, it's pretty static. diff --git a/graph/iterator/materialize_iterator.go b/graph/iterator/materialize_iterator.go new file mode 100644 index 0000000..4180a0b --- /dev/null +++ b/graph/iterator/materialize_iterator.go @@ -0,0 +1,235 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iterator + +// A simple iterator that, when first called Contains() or Next() upon, materializes the whole subiterator, stores it locally, and responds. Essentially a cache. + +import ( + "fmt" + "strings" + + "github.com/google/cayley/graph" +) + +var abortMaterializeAt = 1000 + +type result struct { + id graph.Value + tags map[string]graph.Value +} + +type Materialize struct { + uid uint64 + tags graph.Tagger + containsMap map[graph.Value]int + values []result + lastIndex int + subIt graph.Iterator + hasRun bool + aborted bool +} + +func NewMaterialize(sub graph.Iterator) *Materialize { + return &Materialize{ + uid: NextUID(), + containsMap: make(map[graph.Value]int), + subIt: sub, + } +} + +func (it *Materialize) UID() uint64 { + return it.uid +} + +func (it *Materialize) Reset() { + it.subIt.Reset() + it.lastIndex = 0 +} + +func (it *Materialize) Close() { + it.subIt.Close() + it.containsMap = nil + it.values = nil + it.hasRun = false +} + +func (it *Materialize) Tagger() *graph.Tagger { + return &it.tags +} + +func (it *Materialize) TagResults(dst map[string]graph.Value) { + if !it.hasRun { + return + } + for _, tag := range it.tags.Tags() { + dst[tag] = it.Result() + } + + for tag, value := range it.values[it.lastIndex].tags { + dst[tag] = value + } +} + +func (it *Materialize) Clone() graph.Iterator { + out := NewMaterialize(it.subIt.Clone()) + out.tags.CopyFrom(it) + return out +} + +// Print some information about the iterator. +func (it *Materialize) DebugString(indent int) string { + return fmt.Sprintf("%s(%s tags: %s Size: %d\n%s)", + strings.Repeat(" ", indent), + it.Type(), + it.tags.Tags(), + len(it.values), + it.subIt.DebugString(indent+4), + ) +} + +// Register this iterator as a Materialize iterator. +func (it *Materialize) Type() graph.Type { return graph.Materialize } + +// DEPRECATED +func (it *Materialize) ResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.Result()) + tree.AddSubtree(it.subIt.ResultTree()) + return tree +} + +func (it *Materialize) Result() graph.Value { + if it.lastIndex+1 > len(it.values) { + return nil + } + return it.values[it.lastIndex].id +} + +func (it *Materialize) SubIterators() []graph.Iterator { + return []graph.Iterator{it.subIt} +} + +func (it *Materialize) Optimize() (graph.Iterator, bool) { + newSub, changed := it.subIt.Optimize() + if changed { + it.subIt = newSub + if it.subIt.Type() == graph.Null { + return it.subIt, true + } + } + return it, false +} + +// Size is the number of values stored, if we've got them all. +// Otherwise, guess based on the size of the subiterator. +func (it *Materialize) Size() (int64, bool) { + if it.hasRun { + return int64(len(it.values)), true + } + return it.subIt.Size() +} + +// The entire point of Materialize is to amortize the cost by +// putting it all up front. +func (it *Materialize) Stats() graph.IteratorStats { + overhead := int64(2) + size, _ := it.Size() + subitStats := it.subIt.Stats() + return graph.IteratorStats{ + ContainsCost: overhead * subitStats.NextCost, + NextCost: overhead * subitStats.NextCost, + Size: size, + } +} + +func (it *Materialize) Next() (graph.Value, bool) { + if !it.hasRun { + it.materializeSet() + } + if it.aborted { + return graph.Next(it.subIt) + } + + lastVal := it.Result() + for it.lastIndex < len(it.values) { + it.lastIndex++ + if it.Result() != lastVal { + return it.Result(), true + } + } + return nil, false +} + +func (it *Materialize) Contains(v graph.Value) bool { + if !it.hasRun { + it.materializeSet() + } + if it.aborted { + return it.subIt.Contains(v) + } + if i, ok := it.containsMap[v]; ok { + it.lastIndex = i + return true + } + return false +} + +func (it *Materialize) NextResult() bool { + if !it.hasRun { + it.materializeSet() + } + if it.aborted { + return it.subIt.NextResult() + } + + i := it.lastIndex + 1 + if i == len(it.values) { + return false + } + if it.Result() == it.values[i].id { + it.lastIndex = i + return true + } + return false +} + +func (it *Materialize) materializeSet() { + i := 0 + for { + val, ok := graph.Next(it.subIt) + if !ok { + break + } + i += 1 + if i > abortMaterializeAt { + it.aborted = true + break + } + tags := make(map[string]graph.Value) + it.subIt.TagResults(tags) + it.containsMap[val] = len(it.values) + it.values = append(it.values, result{id: val, tags: tags}) + for it.subIt.NextResult() == true { + tags := make(map[string]graph.Value) + it.subIt.TagResults(tags) + it.values = append(it.values, result{id: val, tags: tags}) + } + } + if it.aborted { + it.values = nil + it.containsMap = nil + it.subIt.Reset() + } + it.hasRun = true +}