explain logging and weight fixing

This commit is contained in:
Barak Michener 2014-08-16 05:19:16 -04:00
parent e1e95b9686
commit e453385d5e
10 changed files with 95 additions and 14 deletions

View file

@ -183,6 +183,9 @@ type IteratorStats struct {
ContainsCost int64
NextCost int64
Size int64
Next int64
Contains int64
ContainsNext int64
}
// Type enumerates the set of Iterator types.
@ -250,6 +253,24 @@ func (t Type) String() string {
return types[t]
}
type StatsContainer struct {
IteratorStats
Kind string
Uid uint64
SubIts []StatsContainer
}
func DumpStats(it Iterator) StatsContainer {
var out StatsContainer
out.IteratorStats = it.Stats()
out.Kind = it.Type().String()
out.Uid = it.UID()
for _, sub := range it.SubIterators() {
out.SubIts = append(out.SubIts, DumpStats(sub))
}
return out
}
// Utility logging functions for when an iterator gets called Next upon, or Contains upon, as
// well as what they return. Highly useful for tracing the execution path of a query.
func ContainsLogIn(it Iterator, val Value) {

View file

@ -36,6 +36,7 @@ type Int64 struct {
max, min int64
at int64
result graph.Value
runstats graph.IteratorStats
}
// Creates a new Int64 with the given range.
@ -89,6 +90,7 @@ func (it *Int64) DebugString(indent int) string {
// Return the next integer, and mark it as the result.
func (it *Int64) Next() bool {
graph.NextLogIn(it)
it.runstats.Next += 1
if it.at == -1 {
return graph.NextLogOut(it, nil, false)
}
@ -130,6 +132,7 @@ func (it *Int64) Size() (int64, bool) {
// withing the range, assuming the value is an int64.
func (it *Int64) Contains(tsv graph.Value) bool {
graph.ContainsLogIn(it, tsv)
it.runstats.Contains += 1
v := tsv.(int64)
if it.min <= v && v <= it.max {
it.result = v
@ -153,5 +156,7 @@ func (it *Int64) Stats() graph.IteratorStats {
ContainsCost: 1,
NextCost: 1,
Size: s,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
}
}

View file

@ -32,6 +32,7 @@ type And struct {
primaryIt graph.Iterator
checkList []graph.Iterator
result graph.Value
runstats graph.IteratorStats
}
// Creates a new And iterator.
@ -158,6 +159,7 @@ func (it *And) AddSubIterator(sub graph.Iterator) {
// is therefore very important.
func (it *And) Next() bool {
graph.NextLogIn(it)
it.runstats.Next += 1
for graph.Next(it.primaryIt) {
curr := it.primaryIt.Result()
if it.subItsContain(curr, nil) {
@ -211,6 +213,7 @@ func (it *And) checkContainsList(val graph.Value, lastResult graph.Value) bool {
// Check a value against the entire iterator, in order.
func (it *And) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
lastResult := it.result
if it.checkList != nil {
return it.checkContainsList(val, lastResult)

View file

@ -17,6 +17,8 @@ package iterator
import (
"sort"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
)
@ -68,7 +70,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
// And now, without changing any of the iterators, we reorder them. it_list is
// now a permutation of itself, but the contents are unchanged.
its = optimizeOrder(its)
its = it.optimizeOrder(its)
its = materializeIts(its)
@ -87,6 +89,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
newAnd.tags.CopyFrom(it)
newAnd.optimizeContains()
glog.V(3).Infoln(it.UID(), "became", newAnd.UID())
// And close ourselves but not our subiterators -- some may still be alive in
// the new And (they were unchanged upon calling Optimize() on them, at the
@ -133,7 +136,7 @@ func (_ *And) optimizeReplacement(its []graph.Iterator) graph.Iterator {
// optimizeOrder(l) takes a list and returns a list, containing the same contents
// but with a new ordering, however it wishes.
func optimizeOrder(its []graph.Iterator) []graph.Iterator {
func (it *And) optimizeOrder(its []graph.Iterator) []graph.Iterator {
var (
// bad contains iterators that can't be (efficiently) nexted, such as
// graph.Optional or graph.Not. Separate them out and tack them on at the end.
@ -146,29 +149,35 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
// Total cost is defined as The Next()ed iterator's cost to Next() out
// all of it's contents, and to Contains() each of those against everyone
// else.
for _, it := range its {
if _, canNext := it.(graph.Nexter); !canNext {
bad = append(bad, it)
for _, root := range its {
if _, canNext := root.(graph.Nexter); !canNext {
bad = append(bad, root)
continue
}
rootStats := it.Stats()
rootStats := root.Stats()
cost := rootStats.NextCost
for _, f := range its {
if _, canNext := it.(graph.Nexter); !canNext {
if _, canNext := f.(graph.Nexter); !canNext {
continue
}
if f == it {
if f == root {
continue
}
stats := f.Stats()
cost += stats.ContainsCost
cost += stats.ContainsCost * (rootStats.Size / (stats.Size + 1))
}
cost *= rootStats.Size
if glog.V(3) {
glog.V(3).Infoln("And:", it.UID(), "Root:", root.UID(), "Total Cost:", cost, "Best:", bestCost)
}
if cost < bestCost {
best = it
best = root
bestCost = cost
}
}
if glog.V(3) {
glog.V(3).Infoln("And:", it.UID(), "Choosing:", best.UID(), "Best:", bestCost)
}
// TODO(barakmich): Optimization of order need not stop here. Picking a smart
// Contains() order based on probability of getting a false Contains() first is
@ -320,7 +329,7 @@ func (it *And) Stats() graph.IteratorStats {
Size := primaryStats.Size
for _, sub := range it.internalIterators {
stats := sub.Stats()
NextCost += stats.ContainsCost
NextCost += stats.ContainsCost * (primaryStats.Size / (stats.Size + 1))
ContainsCost += stats.ContainsCost
if Size > stats.Size {
Size = stats.Size
@ -330,6 +339,8 @@ func (it *And) Stats() graph.IteratorStats {
ContainsCost: ContainsCost * 2,
NextCost: NextCost,
Size: Size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
}
}

View file

@ -108,9 +108,10 @@ func (it *Fixed) DebugString(indent int) string {
if len(it.values) > 0 {
value = fmt.Sprint(it.values[0])
}
return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)",
return fmt.Sprintf("%s(%s %d tags: %s Size: %d id0: %d)",
strings.Repeat(" ", indent),
it.Type(),
it.UID(),
it.tags.Fixed(),
len(it.values),
value,

View file

@ -54,6 +54,7 @@ type HasA struct {
dir quad.Direction
resultIt graph.Iterator
result graph.Value
runstats graph.IteratorStats
}
// Construct a new HasA iterator, given the triple subiterator, and the triple
@ -143,6 +144,7 @@ func (it *HasA) DebugString(indent int) string {
// and then Next() values out of that iterator and Contains() them against our subiterator.
func (it *HasA) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
if glog.V(4) {
glog.V(4).Infoln("Id is", it.ts.NameOf(val))
}
@ -159,6 +161,7 @@ func (it *HasA) Contains(val graph.Value) bool {
// another match is made.
func (it *HasA) NextContains() bool {
for graph.Next(it.resultIt) {
it.runstats.ContainsNext += 1
link := it.resultIt.Result()
if glog.V(4) {
glog.V(4).Infoln("Quad is", it.ts.Quad(link))
@ -193,6 +196,7 @@ func (it *HasA) NextPath() bool {
// pull our direction out of it, and return that.
func (it *HasA) Next() bool {
graph.NextLogIn(it)
it.runstats.Next += 1
if it.resultIt != nil {
it.resultIt.Close()
}
@ -229,6 +233,9 @@ func (it *HasA) Stats() graph.IteratorStats {
NextCost: tripleConstant + subitStats.NextCost,
ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost,
Size: faninFactor * subitStats.Size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
ContainsNext: it.runstats.ContainsNext,
}
}
@ -244,5 +251,5 @@ func (it *HasA) Close() {
func (it *HasA) Type() graph.Type { return graph.HasA }
func (it *HasA) Size() (int64, bool) {
return 0, true
return it.Stats().Size, false
}

View file

@ -25,6 +25,10 @@ import (
var nextIteratorID uint64
func init() {
atomic.StoreUint64(&nextIteratorID, 1)
}
func NextUID() uint64 {
return atomic.AddUint64(&nextIteratorID, 1) - 1
}

View file

@ -48,6 +48,7 @@ type LinksTo struct {
dir quad.Direction
nextIt graph.Iterator
result graph.Value
runstats graph.IteratorStats
}
// Construct a new LinksTo iterator around a direction and a subiterator of
@ -118,6 +119,7 @@ func (it *LinksTo) DebugString(indent int) string {
// for the LinksTo.
func (it *LinksTo) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
node := it.ts.TripleDirection(val, it.dir)
if it.primaryIt.Contains(node) {
it.result = val
@ -155,7 +157,9 @@ func (it *LinksTo) Optimize() (graph.Iterator, bool) {
// Next()ing a LinksTo operates as described above.
func (it *LinksTo) Next() bool {
graph.NextLogIn(it)
it.runstats.Next += 1
if graph.Next(it.nextIt) {
it.runstats.ContainsNext += 1
it.result = it.nextIt.Result()
return graph.NextLogOut(it, it.nextIt, true)
}
@ -201,6 +205,9 @@ func (it *LinksTo) Stats() graph.IteratorStats {
NextCost: nextConstant + subitStats.NextCost,
ContainsCost: checkConstant + subitStats.ContainsCost,
Size: fanoutFactor * subitStats.Size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
ContainsNext: it.runstats.ContainsNext,
}
}

View file

@ -45,11 +45,13 @@ type Materialize struct {
tags graph.Tagger
containsMap map[graph.Value]int
values [][]result
actualSize int64
index int
subindex int
subIt graph.Iterator
hasRun bool
aborted bool
runstats graph.IteratorStats
}
func NewMaterialize(sub graph.Iterator) *Materialize {
@ -111,6 +113,7 @@ func (it *Materialize) Clone() graph.Iterator {
out.aborted = it.aborted
out.values = it.values
out.containsMap = it.containsMap
out.actualSize = it.actualSize
}
return out
}
@ -171,8 +174,10 @@ func (it *Materialize) Optimize() (graph.Iterator, bool) {
// Otherwise, guess based on the size of the subiterator.
func (it *Materialize) Size() (int64, bool) {
if it.hasRun && !it.aborted {
return int64(len(it.values)), true
glog.V(2).Infoln("returning size", it.actualSize)
return it.actualSize, true
}
glog.V(2).Infoln("bailing size", it.actualSize)
return it.subIt.Size()
}
@ -186,11 +191,14 @@ func (it *Materialize) Stats() graph.IteratorStats {
ContainsCost: overhead * subitStats.NextCost,
NextCost: overhead * subitStats.NextCost,
Size: size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
}
}
func (it *Materialize) Next() bool {
graph.NextLogIn(it)
it.runstats.Next += 1
if !it.hasRun {
it.materializeSet()
}
@ -208,6 +216,7 @@ func (it *Materialize) Next() bool {
func (it *Materialize) Contains(v graph.Value) bool {
graph.ContainsLogIn(it, v)
it.runstats.Contains += 1
if !it.hasRun {
it.materializeSet()
}
@ -264,10 +273,17 @@ func (it *Materialize) materializeSet() {
tags := make(map[string]graph.Value)
it.subIt.TagResults(tags)
it.values[index] = append(it.values[index], result{id: id, tags: tags})
it.actualSize += 1
for it.subIt.NextPath() {
i++
if i > abortMaterializeAt {
it.aborted = true
break
}
tags := make(map[string]graph.Value)
it.subIt.TagResults(tags)
it.values[index] = append(it.values[index], result{id: id, tags: tags})
it.actualSize += 1
}
}
if it.aborted {

View file

@ -15,6 +15,8 @@
package gremlin
import (
"encoding/json"
"github.com/barakmich/glog"
"github.com/robertkrimen/otto"
@ -278,5 +280,9 @@ func runIteratorOnSession(it graph.Iterator, ses *Session) {
}
}
}
if glog.V(2) {
bytes, _ := json.MarshalIndent(graph.DumpStats(it), "", " ")
glog.V(2).Infoln(string(bytes))
}
it.Close()
}