explain logging and weight fixing

This commit is contained in:
Barak Michener 2014-08-16 05:19:16 -04:00
parent e1e95b9686
commit e453385d5e
10 changed files with 95 additions and 14 deletions

View file

@ -183,6 +183,9 @@ type IteratorStats struct {
ContainsCost int64 ContainsCost int64
NextCost int64 NextCost int64
Size int64 Size int64
Next int64
Contains int64
ContainsNext int64
} }
// Type enumerates the set of Iterator types. // Type enumerates the set of Iterator types.
@ -250,6 +253,24 @@ func (t Type) String() string {
return types[t] return types[t]
} }
type StatsContainer struct {
IteratorStats
Kind string
Uid uint64
SubIts []StatsContainer
}
func DumpStats(it Iterator) StatsContainer {
var out StatsContainer
out.IteratorStats = it.Stats()
out.Kind = it.Type().String()
out.Uid = it.UID()
for _, sub := range it.SubIterators() {
out.SubIts = append(out.SubIts, DumpStats(sub))
}
return out
}
// Utility logging functions for when an iterator gets called Next upon, or Contains upon, as // Utility logging functions for when an iterator gets called Next upon, or Contains upon, as
// well as what they return. Highly useful for tracing the execution path of a query. // well as what they return. Highly useful for tracing the execution path of a query.
func ContainsLogIn(it Iterator, val Value) { func ContainsLogIn(it Iterator, val Value) {

View file

@ -36,6 +36,7 @@ type Int64 struct {
max, min int64 max, min int64
at int64 at int64
result graph.Value result graph.Value
runstats graph.IteratorStats
} }
// Creates a new Int64 with the given range. // Creates a new Int64 with the given range.
@ -89,6 +90,7 @@ func (it *Int64) DebugString(indent int) string {
// Return the next integer, and mark it as the result. // Return the next integer, and mark it as the result.
func (it *Int64) Next() bool { func (it *Int64) Next() bool {
graph.NextLogIn(it) graph.NextLogIn(it)
it.runstats.Next += 1
if it.at == -1 { if it.at == -1 {
return graph.NextLogOut(it, nil, false) return graph.NextLogOut(it, nil, false)
} }
@ -130,6 +132,7 @@ func (it *Int64) Size() (int64, bool) {
// withing the range, assuming the value is an int64. // withing the range, assuming the value is an int64.
func (it *Int64) Contains(tsv graph.Value) bool { func (it *Int64) Contains(tsv graph.Value) bool {
graph.ContainsLogIn(it, tsv) graph.ContainsLogIn(it, tsv)
it.runstats.Contains += 1
v := tsv.(int64) v := tsv.(int64)
if it.min <= v && v <= it.max { if it.min <= v && v <= it.max {
it.result = v it.result = v
@ -153,5 +156,7 @@ func (it *Int64) Stats() graph.IteratorStats {
ContainsCost: 1, ContainsCost: 1,
NextCost: 1, NextCost: 1,
Size: s, Size: s,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
} }
} }

View file

@ -32,6 +32,7 @@ type And struct {
primaryIt graph.Iterator primaryIt graph.Iterator
checkList []graph.Iterator checkList []graph.Iterator
result graph.Value result graph.Value
runstats graph.IteratorStats
} }
// Creates a new And iterator. // Creates a new And iterator.
@ -158,6 +159,7 @@ func (it *And) AddSubIterator(sub graph.Iterator) {
// is therefore very important. // is therefore very important.
func (it *And) Next() bool { func (it *And) Next() bool {
graph.NextLogIn(it) graph.NextLogIn(it)
it.runstats.Next += 1
for graph.Next(it.primaryIt) { for graph.Next(it.primaryIt) {
curr := it.primaryIt.Result() curr := it.primaryIt.Result()
if it.subItsContain(curr, nil) { if it.subItsContain(curr, nil) {
@ -211,6 +213,7 @@ func (it *And) checkContainsList(val graph.Value, lastResult graph.Value) bool {
// Check a value against the entire iterator, in order. // Check a value against the entire iterator, in order.
func (it *And) Contains(val graph.Value) bool { func (it *And) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val) graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
lastResult := it.result lastResult := it.result
if it.checkList != nil { if it.checkList != nil {
return it.checkContainsList(val, lastResult) return it.checkContainsList(val, lastResult)

View file

@ -17,6 +17,8 @@ package iterator
import ( import (
"sort" "sort"
"github.com/barakmich/glog"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
) )
@ -68,7 +70,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
// And now, without changing any of the iterators, we reorder them. it_list is // And now, without changing any of the iterators, we reorder them. it_list is
// now a permutation of itself, but the contents are unchanged. // now a permutation of itself, but the contents are unchanged.
its = optimizeOrder(its) its = it.optimizeOrder(its)
its = materializeIts(its) its = materializeIts(its)
@ -87,6 +89,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
newAnd.tags.CopyFrom(it) newAnd.tags.CopyFrom(it)
newAnd.optimizeContains() newAnd.optimizeContains()
glog.V(3).Infoln(it.UID(), "became", newAnd.UID())
// And close ourselves but not our subiterators -- some may still be alive in // And close ourselves but not our subiterators -- some may still be alive in
// the new And (they were unchanged upon calling Optimize() on them, at the // the new And (they were unchanged upon calling Optimize() on them, at the
@ -133,7 +136,7 @@ func (_ *And) optimizeReplacement(its []graph.Iterator) graph.Iterator {
// optimizeOrder(l) takes a list and returns a list, containing the same contents // optimizeOrder(l) takes a list and returns a list, containing the same contents
// but with a new ordering, however it wishes. // but with a new ordering, however it wishes.
func optimizeOrder(its []graph.Iterator) []graph.Iterator { func (it *And) optimizeOrder(its []graph.Iterator) []graph.Iterator {
var ( var (
// bad contains iterators that can't be (efficiently) nexted, such as // bad contains iterators that can't be (efficiently) nexted, such as
// graph.Optional or graph.Not. Separate them out and tack them on at the end. // graph.Optional or graph.Not. Separate them out and tack them on at the end.
@ -146,29 +149,35 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
// Total cost is defined as The Next()ed iterator's cost to Next() out // Total cost is defined as The Next()ed iterator's cost to Next() out
// all of it's contents, and to Contains() each of those against everyone // all of it's contents, and to Contains() each of those against everyone
// else. // else.
for _, it := range its { for _, root := range its {
if _, canNext := it.(graph.Nexter); !canNext { if _, canNext := root.(graph.Nexter); !canNext {
bad = append(bad, it) bad = append(bad, root)
continue continue
} }
rootStats := it.Stats() rootStats := root.Stats()
cost := rootStats.NextCost cost := rootStats.NextCost
for _, f := range its { for _, f := range its {
if _, canNext := it.(graph.Nexter); !canNext { if _, canNext := f.(graph.Nexter); !canNext {
continue continue
} }
if f == it { if f == root {
continue continue
} }
stats := f.Stats() stats := f.Stats()
cost += stats.ContainsCost cost += stats.ContainsCost * (rootStats.Size / (stats.Size + 1))
} }
cost *= rootStats.Size cost *= rootStats.Size
if glog.V(3) {
glog.V(3).Infoln("And:", it.UID(), "Root:", root.UID(), "Total Cost:", cost, "Best:", bestCost)
}
if cost < bestCost { if cost < bestCost {
best = it best = root
bestCost = cost bestCost = cost
} }
} }
if glog.V(3) {
glog.V(3).Infoln("And:", it.UID(), "Choosing:", best.UID(), "Best:", bestCost)
}
// TODO(barakmich): Optimization of order need not stop here. Picking a smart // TODO(barakmich): Optimization of order need not stop here. Picking a smart
// Contains() order based on probability of getting a false Contains() first is // Contains() order based on probability of getting a false Contains() first is
@ -320,7 +329,7 @@ func (it *And) Stats() graph.IteratorStats {
Size := primaryStats.Size Size := primaryStats.Size
for _, sub := range it.internalIterators { for _, sub := range it.internalIterators {
stats := sub.Stats() stats := sub.Stats()
NextCost += stats.ContainsCost NextCost += stats.ContainsCost * (primaryStats.Size / (stats.Size + 1))
ContainsCost += stats.ContainsCost ContainsCost += stats.ContainsCost
if Size > stats.Size { if Size > stats.Size {
Size = stats.Size Size = stats.Size
@ -330,6 +339,8 @@ func (it *And) Stats() graph.IteratorStats {
ContainsCost: ContainsCost * 2, ContainsCost: ContainsCost * 2,
NextCost: NextCost, NextCost: NextCost,
Size: Size, Size: Size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
} }
} }

View file

@ -108,9 +108,10 @@ func (it *Fixed) DebugString(indent int) string {
if len(it.values) > 0 { if len(it.values) > 0 {
value = fmt.Sprint(it.values[0]) value = fmt.Sprint(it.values[0])
} }
return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)", return fmt.Sprintf("%s(%s %d tags: %s Size: %d id0: %d)",
strings.Repeat(" ", indent), strings.Repeat(" ", indent),
it.Type(), it.Type(),
it.UID(),
it.tags.Fixed(), it.tags.Fixed(),
len(it.values), len(it.values),
value, value,

View file

@ -54,6 +54,7 @@ type HasA struct {
dir quad.Direction dir quad.Direction
resultIt graph.Iterator resultIt graph.Iterator
result graph.Value result graph.Value
runstats graph.IteratorStats
} }
// Construct a new HasA iterator, given the triple subiterator, and the triple // Construct a new HasA iterator, given the triple subiterator, and the triple
@ -143,6 +144,7 @@ func (it *HasA) DebugString(indent int) string {
// and then Next() values out of that iterator and Contains() them against our subiterator. // and then Next() values out of that iterator and Contains() them against our subiterator.
func (it *HasA) Contains(val graph.Value) bool { func (it *HasA) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val) graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
if glog.V(4) { if glog.V(4) {
glog.V(4).Infoln("Id is", it.ts.NameOf(val)) glog.V(4).Infoln("Id is", it.ts.NameOf(val))
} }
@ -159,6 +161,7 @@ func (it *HasA) Contains(val graph.Value) bool {
// another match is made. // another match is made.
func (it *HasA) NextContains() bool { func (it *HasA) NextContains() bool {
for graph.Next(it.resultIt) { for graph.Next(it.resultIt) {
it.runstats.ContainsNext += 1
link := it.resultIt.Result() link := it.resultIt.Result()
if glog.V(4) { if glog.V(4) {
glog.V(4).Infoln("Quad is", it.ts.Quad(link)) glog.V(4).Infoln("Quad is", it.ts.Quad(link))
@ -193,6 +196,7 @@ func (it *HasA) NextPath() bool {
// pull our direction out of it, and return that. // pull our direction out of it, and return that.
func (it *HasA) Next() bool { func (it *HasA) Next() bool {
graph.NextLogIn(it) graph.NextLogIn(it)
it.runstats.Next += 1
if it.resultIt != nil { if it.resultIt != nil {
it.resultIt.Close() it.resultIt.Close()
} }
@ -229,6 +233,9 @@ func (it *HasA) Stats() graph.IteratorStats {
NextCost: tripleConstant + subitStats.NextCost, NextCost: tripleConstant + subitStats.NextCost,
ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost, ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost,
Size: faninFactor * subitStats.Size, Size: faninFactor * subitStats.Size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
ContainsNext: it.runstats.ContainsNext,
} }
} }
@ -244,5 +251,5 @@ func (it *HasA) Close() {
func (it *HasA) Type() graph.Type { return graph.HasA } func (it *HasA) Type() graph.Type { return graph.HasA }
func (it *HasA) Size() (int64, bool) { func (it *HasA) Size() (int64, bool) {
return 0, true return it.Stats().Size, false
} }

View file

@ -25,6 +25,10 @@ import (
var nextIteratorID uint64 var nextIteratorID uint64
func init() {
atomic.StoreUint64(&nextIteratorID, 1)
}
func NextUID() uint64 { func NextUID() uint64 {
return atomic.AddUint64(&nextIteratorID, 1) - 1 return atomic.AddUint64(&nextIteratorID, 1) - 1
} }

View file

@ -48,6 +48,7 @@ type LinksTo struct {
dir quad.Direction dir quad.Direction
nextIt graph.Iterator nextIt graph.Iterator
result graph.Value result graph.Value
runstats graph.IteratorStats
} }
// Construct a new LinksTo iterator around a direction and a subiterator of // Construct a new LinksTo iterator around a direction and a subiterator of
@ -118,6 +119,7 @@ func (it *LinksTo) DebugString(indent int) string {
// for the LinksTo. // for the LinksTo.
func (it *LinksTo) Contains(val graph.Value) bool { func (it *LinksTo) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val) graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
node := it.ts.TripleDirection(val, it.dir) node := it.ts.TripleDirection(val, it.dir)
if it.primaryIt.Contains(node) { if it.primaryIt.Contains(node) {
it.result = val it.result = val
@ -155,7 +157,9 @@ func (it *LinksTo) Optimize() (graph.Iterator, bool) {
// Next()ing a LinksTo operates as described above. // Next()ing a LinksTo operates as described above.
func (it *LinksTo) Next() bool { func (it *LinksTo) Next() bool {
graph.NextLogIn(it) graph.NextLogIn(it)
it.runstats.Next += 1
if graph.Next(it.nextIt) { if graph.Next(it.nextIt) {
it.runstats.ContainsNext += 1
it.result = it.nextIt.Result() it.result = it.nextIt.Result()
return graph.NextLogOut(it, it.nextIt, true) return graph.NextLogOut(it, it.nextIt, true)
} }
@ -201,6 +205,9 @@ func (it *LinksTo) Stats() graph.IteratorStats {
NextCost: nextConstant + subitStats.NextCost, NextCost: nextConstant + subitStats.NextCost,
ContainsCost: checkConstant + subitStats.ContainsCost, ContainsCost: checkConstant + subitStats.ContainsCost,
Size: fanoutFactor * subitStats.Size, Size: fanoutFactor * subitStats.Size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
ContainsNext: it.runstats.ContainsNext,
} }
} }

View file

@ -45,11 +45,13 @@ type Materialize struct {
tags graph.Tagger tags graph.Tagger
containsMap map[graph.Value]int containsMap map[graph.Value]int
values [][]result values [][]result
actualSize int64
index int index int
subindex int subindex int
subIt graph.Iterator subIt graph.Iterator
hasRun bool hasRun bool
aborted bool aborted bool
runstats graph.IteratorStats
} }
func NewMaterialize(sub graph.Iterator) *Materialize { func NewMaterialize(sub graph.Iterator) *Materialize {
@ -111,6 +113,7 @@ func (it *Materialize) Clone() graph.Iterator {
out.aborted = it.aborted out.aborted = it.aborted
out.values = it.values out.values = it.values
out.containsMap = it.containsMap out.containsMap = it.containsMap
out.actualSize = it.actualSize
} }
return out return out
} }
@ -171,8 +174,10 @@ func (it *Materialize) Optimize() (graph.Iterator, bool) {
// Otherwise, guess based on the size of the subiterator. // Otherwise, guess based on the size of the subiterator.
func (it *Materialize) Size() (int64, bool) { func (it *Materialize) Size() (int64, bool) {
if it.hasRun && !it.aborted { if it.hasRun && !it.aborted {
return int64(len(it.values)), true glog.V(2).Infoln("returning size", it.actualSize)
return it.actualSize, true
} }
glog.V(2).Infoln("bailing size", it.actualSize)
return it.subIt.Size() return it.subIt.Size()
} }
@ -186,11 +191,14 @@ func (it *Materialize) Stats() graph.IteratorStats {
ContainsCost: overhead * subitStats.NextCost, ContainsCost: overhead * subitStats.NextCost,
NextCost: overhead * subitStats.NextCost, NextCost: overhead * subitStats.NextCost,
Size: size, Size: size,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
} }
} }
func (it *Materialize) Next() bool { func (it *Materialize) Next() bool {
graph.NextLogIn(it) graph.NextLogIn(it)
it.runstats.Next += 1
if !it.hasRun { if !it.hasRun {
it.materializeSet() it.materializeSet()
} }
@ -208,6 +216,7 @@ func (it *Materialize) Next() bool {
func (it *Materialize) Contains(v graph.Value) bool { func (it *Materialize) Contains(v graph.Value) bool {
graph.ContainsLogIn(it, v) graph.ContainsLogIn(it, v)
it.runstats.Contains += 1
if !it.hasRun { if !it.hasRun {
it.materializeSet() it.materializeSet()
} }
@ -264,10 +273,17 @@ func (it *Materialize) materializeSet() {
tags := make(map[string]graph.Value) tags := make(map[string]graph.Value)
it.subIt.TagResults(tags) it.subIt.TagResults(tags)
it.values[index] = append(it.values[index], result{id: id, tags: tags}) it.values[index] = append(it.values[index], result{id: id, tags: tags})
it.actualSize += 1
for it.subIt.NextPath() { for it.subIt.NextPath() {
i++
if i > abortMaterializeAt {
it.aborted = true
break
}
tags := make(map[string]graph.Value) tags := make(map[string]graph.Value)
it.subIt.TagResults(tags) it.subIt.TagResults(tags)
it.values[index] = append(it.values[index], result{id: id, tags: tags}) it.values[index] = append(it.values[index], result{id: id, tags: tags})
it.actualSize += 1
} }
} }
if it.aborted { if it.aborted {

View file

@ -15,6 +15,8 @@
package gremlin package gremlin
import ( import (
"encoding/json"
"github.com/barakmich/glog" "github.com/barakmich/glog"
"github.com/robertkrimen/otto" "github.com/robertkrimen/otto"
@ -278,5 +280,9 @@ func runIteratorOnSession(it graph.Iterator, ses *Session) {
} }
} }
} }
if glog.V(2) {
bytes, _ := json.MarshalIndent(graph.DumpStats(it), "", " ")
glog.V(2).Infoln(string(bytes))
}
it.Close() it.Close()
} }