diff --git a/graph/iterator.go b/graph/iterator.go index 96f1587..ca17cb5 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -183,6 +183,9 @@ type IteratorStats struct { ContainsCost int64 NextCost int64 Size int64 + Next int64 + Contains int64 + ContainsNext int64 } // Type enumerates the set of Iterator types. @@ -250,6 +253,24 @@ func (t Type) String() string { return types[t] } +type StatsContainer struct { + IteratorStats + Kind string + Uid uint64 + SubIts []StatsContainer +} + +func DumpStats(it Iterator) StatsContainer { + var out StatsContainer + out.IteratorStats = it.Stats() + out.Kind = it.Type().String() + out.Uid = it.UID() + for _, sub := range it.SubIterators() { + out.SubIts = append(out.SubIts, DumpStats(sub)) + } + return out +} + // Utility logging functions for when an iterator gets called Next upon, or Contains upon, as // well as what they return. Highly useful for tracing the execution path of a query. func ContainsLogIn(it Iterator, val Value) { diff --git a/graph/iterator/all_iterator.go b/graph/iterator/all_iterator.go index 80e471c..0a890cf 100644 --- a/graph/iterator/all_iterator.go +++ b/graph/iterator/all_iterator.go @@ -36,6 +36,7 @@ type Int64 struct { max, min int64 at int64 result graph.Value + runstats graph.IteratorStats } // Creates a new Int64 with the given range. @@ -89,6 +90,7 @@ func (it *Int64) DebugString(indent int) string { // Return the next integer, and mark it as the result. func (it *Int64) Next() bool { graph.NextLogIn(it) + it.runstats.Next += 1 if it.at == -1 { return graph.NextLogOut(it, nil, false) } @@ -130,6 +132,7 @@ func (it *Int64) Size() (int64, bool) { // withing the range, assuming the value is an int64. func (it *Int64) Contains(tsv graph.Value) bool { graph.ContainsLogIn(it, tsv) + it.runstats.Contains += 1 v := tsv.(int64) if it.min <= v && v <= it.max { it.result = v @@ -153,5 +156,7 @@ func (it *Int64) Stats() graph.IteratorStats { ContainsCost: 1, NextCost: 1, Size: s, + Next: it.runstats.Next, + Contains: it.runstats.Contains, } } diff --git a/graph/iterator/and_iterator.go b/graph/iterator/and_iterator.go index c18cd70..a585cd9 100644 --- a/graph/iterator/and_iterator.go +++ b/graph/iterator/and_iterator.go @@ -32,6 +32,7 @@ type And struct { primaryIt graph.Iterator checkList []graph.Iterator result graph.Value + runstats graph.IteratorStats } // Creates a new And iterator. @@ -158,6 +159,7 @@ func (it *And) AddSubIterator(sub graph.Iterator) { // is therefore very important. func (it *And) Next() bool { graph.NextLogIn(it) + it.runstats.Next += 1 for graph.Next(it.primaryIt) { curr := it.primaryIt.Result() if it.subItsContain(curr, nil) { @@ -211,6 +213,7 @@ func (it *And) checkContainsList(val graph.Value, lastResult graph.Value) bool { // Check a value against the entire iterator, in order. func (it *And) Contains(val graph.Value) bool { graph.ContainsLogIn(it, val) + it.runstats.Contains += 1 lastResult := it.result if it.checkList != nil { return it.checkContainsList(val, lastResult) diff --git a/graph/iterator/and_iterator_optimize.go b/graph/iterator/and_iterator_optimize.go index cb8030a7..f456926 100644 --- a/graph/iterator/and_iterator_optimize.go +++ b/graph/iterator/and_iterator_optimize.go @@ -17,6 +17,8 @@ package iterator import ( "sort" + "github.com/barakmich/glog" + "github.com/google/cayley/graph" ) @@ -68,7 +70,7 @@ func (it *And) Optimize() (graph.Iterator, bool) { // And now, without changing any of the iterators, we reorder them. it_list is // now a permutation of itself, but the contents are unchanged. - its = optimizeOrder(its) + its = it.optimizeOrder(its) its = materializeIts(its) @@ -87,6 +89,7 @@ func (it *And) Optimize() (graph.Iterator, bool) { newAnd.tags.CopyFrom(it) newAnd.optimizeContains() + glog.V(3).Infoln(it.UID(), "became", newAnd.UID()) // And close ourselves but not our subiterators -- some may still be alive in // the new And (they were unchanged upon calling Optimize() on them, at the @@ -133,7 +136,7 @@ func (_ *And) optimizeReplacement(its []graph.Iterator) graph.Iterator { // optimizeOrder(l) takes a list and returns a list, containing the same contents // but with a new ordering, however it wishes. -func optimizeOrder(its []graph.Iterator) []graph.Iterator { +func (it *And) optimizeOrder(its []graph.Iterator) []graph.Iterator { var ( // bad contains iterators that can't be (efficiently) nexted, such as // graph.Optional or graph.Not. Separate them out and tack them on at the end. @@ -146,29 +149,35 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator { // Total cost is defined as The Next()ed iterator's cost to Next() out // all of it's contents, and to Contains() each of those against everyone // else. - for _, it := range its { - if _, canNext := it.(graph.Nexter); !canNext { - bad = append(bad, it) + for _, root := range its { + if _, canNext := root.(graph.Nexter); !canNext { + bad = append(bad, root) continue } - rootStats := it.Stats() + rootStats := root.Stats() cost := rootStats.NextCost for _, f := range its { - if _, canNext := it.(graph.Nexter); !canNext { + if _, canNext := f.(graph.Nexter); !canNext { continue } - if f == it { + if f == root { continue } stats := f.Stats() - cost += stats.ContainsCost + cost += stats.ContainsCost * (rootStats.Size / (stats.Size + 1)) } cost *= rootStats.Size + if glog.V(3) { + glog.V(3).Infoln("And:", it.UID(), "Root:", root.UID(), "Total Cost:", cost, "Best:", bestCost) + } if cost < bestCost { - best = it + best = root bestCost = cost } } + if glog.V(3) { + glog.V(3).Infoln("And:", it.UID(), "Choosing:", best.UID(), "Best:", bestCost) + } // TODO(barakmich): Optimization of order need not stop here. Picking a smart // Contains() order based on probability of getting a false Contains() first is @@ -320,7 +329,7 @@ func (it *And) Stats() graph.IteratorStats { Size := primaryStats.Size for _, sub := range it.internalIterators { stats := sub.Stats() - NextCost += stats.ContainsCost + NextCost += stats.ContainsCost * (primaryStats.Size / (stats.Size + 1)) ContainsCost += stats.ContainsCost if Size > stats.Size { Size = stats.Size @@ -330,6 +339,8 @@ func (it *And) Stats() graph.IteratorStats { ContainsCost: ContainsCost * 2, NextCost: NextCost, Size: Size, + Next: it.runstats.Next, + Contains: it.runstats.Contains, } } diff --git a/graph/iterator/fixed_iterator.go b/graph/iterator/fixed_iterator.go index a2d57b1..8ecda34 100644 --- a/graph/iterator/fixed_iterator.go +++ b/graph/iterator/fixed_iterator.go @@ -108,9 +108,10 @@ func (it *Fixed) DebugString(indent int) string { if len(it.values) > 0 { value = fmt.Sprint(it.values[0]) } - return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)", + return fmt.Sprintf("%s(%s %d tags: %s Size: %d id0: %d)", strings.Repeat(" ", indent), it.Type(), + it.UID(), it.tags.Fixed(), len(it.values), value, diff --git a/graph/iterator/hasa_iterator.go b/graph/iterator/hasa_iterator.go index b88f58b..b8aae90 100644 --- a/graph/iterator/hasa_iterator.go +++ b/graph/iterator/hasa_iterator.go @@ -54,6 +54,7 @@ type HasA struct { dir quad.Direction resultIt graph.Iterator result graph.Value + runstats graph.IteratorStats } // Construct a new HasA iterator, given the triple subiterator, and the triple @@ -143,6 +144,7 @@ func (it *HasA) DebugString(indent int) string { // and then Next() values out of that iterator and Contains() them against our subiterator. func (it *HasA) Contains(val graph.Value) bool { graph.ContainsLogIn(it, val) + it.runstats.Contains += 1 if glog.V(4) { glog.V(4).Infoln("Id is", it.ts.NameOf(val)) } @@ -159,6 +161,7 @@ func (it *HasA) Contains(val graph.Value) bool { // another match is made. func (it *HasA) NextContains() bool { for graph.Next(it.resultIt) { + it.runstats.ContainsNext += 1 link := it.resultIt.Result() if glog.V(4) { glog.V(4).Infoln("Quad is", it.ts.Quad(link)) @@ -193,6 +196,7 @@ func (it *HasA) NextPath() bool { // pull our direction out of it, and return that. func (it *HasA) Next() bool { graph.NextLogIn(it) + it.runstats.Next += 1 if it.resultIt != nil { it.resultIt.Close() } @@ -229,6 +233,9 @@ func (it *HasA) Stats() graph.IteratorStats { NextCost: tripleConstant + subitStats.NextCost, ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost, Size: faninFactor * subitStats.Size, + Next: it.runstats.Next, + Contains: it.runstats.Contains, + ContainsNext: it.runstats.ContainsNext, } } @@ -244,5 +251,5 @@ func (it *HasA) Close() { func (it *HasA) Type() graph.Type { return graph.HasA } func (it *HasA) Size() (int64, bool) { - return 0, true + return it.Stats().Size, false } diff --git a/graph/iterator/iterator.go b/graph/iterator/iterator.go index 67d8a80..ab36b7b 100644 --- a/graph/iterator/iterator.go +++ b/graph/iterator/iterator.go @@ -25,6 +25,10 @@ import ( var nextIteratorID uint64 +func init() { + atomic.StoreUint64(&nextIteratorID, 1) +} + func NextUID() uint64 { return atomic.AddUint64(&nextIteratorID, 1) - 1 } diff --git a/graph/iterator/linksto_iterator.go b/graph/iterator/linksto_iterator.go index 517fc11..d13c765 100644 --- a/graph/iterator/linksto_iterator.go +++ b/graph/iterator/linksto_iterator.go @@ -48,6 +48,7 @@ type LinksTo struct { dir quad.Direction nextIt graph.Iterator result graph.Value + runstats graph.IteratorStats } // Construct a new LinksTo iterator around a direction and a subiterator of @@ -118,6 +119,7 @@ func (it *LinksTo) DebugString(indent int) string { // for the LinksTo. func (it *LinksTo) Contains(val graph.Value) bool { graph.ContainsLogIn(it, val) + it.runstats.Contains += 1 node := it.ts.TripleDirection(val, it.dir) if it.primaryIt.Contains(node) { it.result = val @@ -155,7 +157,9 @@ func (it *LinksTo) Optimize() (graph.Iterator, bool) { // Next()ing a LinksTo operates as described above. func (it *LinksTo) Next() bool { graph.NextLogIn(it) + it.runstats.Next += 1 if graph.Next(it.nextIt) { + it.runstats.ContainsNext += 1 it.result = it.nextIt.Result() return graph.NextLogOut(it, it.nextIt, true) } @@ -201,6 +205,9 @@ func (it *LinksTo) Stats() graph.IteratorStats { NextCost: nextConstant + subitStats.NextCost, ContainsCost: checkConstant + subitStats.ContainsCost, Size: fanoutFactor * subitStats.Size, + Next: it.runstats.Next, + Contains: it.runstats.Contains, + ContainsNext: it.runstats.ContainsNext, } } diff --git a/graph/iterator/materialize_iterator.go b/graph/iterator/materialize_iterator.go index 8528f4f..46f2d57 100644 --- a/graph/iterator/materialize_iterator.go +++ b/graph/iterator/materialize_iterator.go @@ -45,11 +45,13 @@ type Materialize struct { tags graph.Tagger containsMap map[graph.Value]int values [][]result + actualSize int64 index int subindex int subIt graph.Iterator hasRun bool aborted bool + runstats graph.IteratorStats } func NewMaterialize(sub graph.Iterator) *Materialize { @@ -111,6 +113,7 @@ func (it *Materialize) Clone() graph.Iterator { out.aborted = it.aborted out.values = it.values out.containsMap = it.containsMap + out.actualSize = it.actualSize } return out } @@ -171,8 +174,10 @@ func (it *Materialize) Optimize() (graph.Iterator, bool) { // Otherwise, guess based on the size of the subiterator. func (it *Materialize) Size() (int64, bool) { if it.hasRun && !it.aborted { - return int64(len(it.values)), true + glog.V(2).Infoln("returning size", it.actualSize) + return it.actualSize, true } + glog.V(2).Infoln("bailing size", it.actualSize) return it.subIt.Size() } @@ -186,11 +191,14 @@ func (it *Materialize) Stats() graph.IteratorStats { ContainsCost: overhead * subitStats.NextCost, NextCost: overhead * subitStats.NextCost, Size: size, + Next: it.runstats.Next, + Contains: it.runstats.Contains, } } func (it *Materialize) Next() bool { graph.NextLogIn(it) + it.runstats.Next += 1 if !it.hasRun { it.materializeSet() } @@ -208,6 +216,7 @@ func (it *Materialize) Next() bool { func (it *Materialize) Contains(v graph.Value) bool { graph.ContainsLogIn(it, v) + it.runstats.Contains += 1 if !it.hasRun { it.materializeSet() } @@ -264,10 +273,17 @@ func (it *Materialize) materializeSet() { tags := make(map[string]graph.Value) it.subIt.TagResults(tags) it.values[index] = append(it.values[index], result{id: id, tags: tags}) + it.actualSize += 1 for it.subIt.NextPath() { + i++ + if i > abortMaterializeAt { + it.aborted = true + break + } tags := make(map[string]graph.Value) it.subIt.TagResults(tags) it.values[index] = append(it.values[index], result{id: id, tags: tags}) + it.actualSize += 1 } } if it.aborted { diff --git a/query/gremlin/finals.go b/query/gremlin/finals.go index bdd7de2..a82da13 100644 --- a/query/gremlin/finals.go +++ b/query/gremlin/finals.go @@ -15,6 +15,8 @@ package gremlin import ( + "encoding/json" + "github.com/barakmich/glog" "github.com/robertkrimen/otto" @@ -278,5 +280,9 @@ func runIteratorOnSession(it graph.Iterator, ses *Session) { } } } + if glog.V(2) { + bytes, _ := json.MarshalIndent(graph.DumpStats(it), "", " ") + glog.V(2).Infoln(string(bytes)) + } it.Close() }