explain logging and weight fixing
This commit is contained in:
parent
e1e95b9686
commit
e453385d5e
10 changed files with 95 additions and 14 deletions
|
|
@ -183,6 +183,9 @@ type IteratorStats struct {
|
|||
ContainsCost int64
|
||||
NextCost int64
|
||||
Size int64
|
||||
Next int64
|
||||
Contains int64
|
||||
ContainsNext int64
|
||||
}
|
||||
|
||||
// Type enumerates the set of Iterator types.
|
||||
|
|
@ -250,6 +253,24 @@ func (t Type) String() string {
|
|||
return types[t]
|
||||
}
|
||||
|
||||
type StatsContainer struct {
|
||||
IteratorStats
|
||||
Kind string
|
||||
Uid uint64
|
||||
SubIts []StatsContainer
|
||||
}
|
||||
|
||||
func DumpStats(it Iterator) StatsContainer {
|
||||
var out StatsContainer
|
||||
out.IteratorStats = it.Stats()
|
||||
out.Kind = it.Type().String()
|
||||
out.Uid = it.UID()
|
||||
for _, sub := range it.SubIterators() {
|
||||
out.SubIts = append(out.SubIts, DumpStats(sub))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Utility logging functions for when an iterator gets called Next upon, or Contains upon, as
|
||||
// well as what they return. Highly useful for tracing the execution path of a query.
|
||||
func ContainsLogIn(it Iterator, val Value) {
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ type Int64 struct {
|
|||
max, min int64
|
||||
at int64
|
||||
result graph.Value
|
||||
runstats graph.IteratorStats
|
||||
}
|
||||
|
||||
// Creates a new Int64 with the given range.
|
||||
|
|
@ -89,6 +90,7 @@ func (it *Int64) DebugString(indent int) string {
|
|||
// Return the next integer, and mark it as the result.
|
||||
func (it *Int64) Next() bool {
|
||||
graph.NextLogIn(it)
|
||||
it.runstats.Next += 1
|
||||
if it.at == -1 {
|
||||
return graph.NextLogOut(it, nil, false)
|
||||
}
|
||||
|
|
@ -130,6 +132,7 @@ func (it *Int64) Size() (int64, bool) {
|
|||
// withing the range, assuming the value is an int64.
|
||||
func (it *Int64) Contains(tsv graph.Value) bool {
|
||||
graph.ContainsLogIn(it, tsv)
|
||||
it.runstats.Contains += 1
|
||||
v := tsv.(int64)
|
||||
if it.min <= v && v <= it.max {
|
||||
it.result = v
|
||||
|
|
@ -153,5 +156,7 @@ func (it *Int64) Stats() graph.IteratorStats {
|
|||
ContainsCost: 1,
|
||||
NextCost: 1,
|
||||
Size: s,
|
||||
Next: it.runstats.Next,
|
||||
Contains: it.runstats.Contains,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ type And struct {
|
|||
primaryIt graph.Iterator
|
||||
checkList []graph.Iterator
|
||||
result graph.Value
|
||||
runstats graph.IteratorStats
|
||||
}
|
||||
|
||||
// Creates a new And iterator.
|
||||
|
|
@ -158,6 +159,7 @@ func (it *And) AddSubIterator(sub graph.Iterator) {
|
|||
// is therefore very important.
|
||||
func (it *And) Next() bool {
|
||||
graph.NextLogIn(it)
|
||||
it.runstats.Next += 1
|
||||
for graph.Next(it.primaryIt) {
|
||||
curr := it.primaryIt.Result()
|
||||
if it.subItsContain(curr, nil) {
|
||||
|
|
@ -211,6 +213,7 @@ func (it *And) checkContainsList(val graph.Value, lastResult graph.Value) bool {
|
|||
// Check a value against the entire iterator, in order.
|
||||
func (it *And) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
it.runstats.Contains += 1
|
||||
lastResult := it.result
|
||||
if it.checkList != nil {
|
||||
return it.checkContainsList(val, lastResult)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ package iterator
|
|||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
)
|
||||
|
||||
|
|
@ -68,7 +70,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
|
|||
|
||||
// And now, without changing any of the iterators, we reorder them. it_list is
|
||||
// now a permutation of itself, but the contents are unchanged.
|
||||
its = optimizeOrder(its)
|
||||
its = it.optimizeOrder(its)
|
||||
|
||||
its = materializeIts(its)
|
||||
|
||||
|
|
@ -87,6 +89,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
|
|||
newAnd.tags.CopyFrom(it)
|
||||
|
||||
newAnd.optimizeContains()
|
||||
glog.V(3).Infoln(it.UID(), "became", newAnd.UID())
|
||||
|
||||
// And close ourselves but not our subiterators -- some may still be alive in
|
||||
// the new And (they were unchanged upon calling Optimize() on them, at the
|
||||
|
|
@ -133,7 +136,7 @@ func (_ *And) optimizeReplacement(its []graph.Iterator) graph.Iterator {
|
|||
|
||||
// optimizeOrder(l) takes a list and returns a list, containing the same contents
|
||||
// but with a new ordering, however it wishes.
|
||||
func optimizeOrder(its []graph.Iterator) []graph.Iterator {
|
||||
func (it *And) optimizeOrder(its []graph.Iterator) []graph.Iterator {
|
||||
var (
|
||||
// bad contains iterators that can't be (efficiently) nexted, such as
|
||||
// graph.Optional or graph.Not. Separate them out and tack them on at the end.
|
||||
|
|
@ -146,29 +149,35 @@ func optimizeOrder(its []graph.Iterator) []graph.Iterator {
|
|||
// Total cost is defined as The Next()ed iterator's cost to Next() out
|
||||
// all of it's contents, and to Contains() each of those against everyone
|
||||
// else.
|
||||
for _, it := range its {
|
||||
if _, canNext := it.(graph.Nexter); !canNext {
|
||||
bad = append(bad, it)
|
||||
for _, root := range its {
|
||||
if _, canNext := root.(graph.Nexter); !canNext {
|
||||
bad = append(bad, root)
|
||||
continue
|
||||
}
|
||||
rootStats := it.Stats()
|
||||
rootStats := root.Stats()
|
||||
cost := rootStats.NextCost
|
||||
for _, f := range its {
|
||||
if _, canNext := it.(graph.Nexter); !canNext {
|
||||
if _, canNext := f.(graph.Nexter); !canNext {
|
||||
continue
|
||||
}
|
||||
if f == it {
|
||||
if f == root {
|
||||
continue
|
||||
}
|
||||
stats := f.Stats()
|
||||
cost += stats.ContainsCost
|
||||
cost += stats.ContainsCost * (rootStats.Size / (stats.Size + 1))
|
||||
}
|
||||
cost *= rootStats.Size
|
||||
if glog.V(3) {
|
||||
glog.V(3).Infoln("And:", it.UID(), "Root:", root.UID(), "Total Cost:", cost, "Best:", bestCost)
|
||||
}
|
||||
if cost < bestCost {
|
||||
best = it
|
||||
best = root
|
||||
bestCost = cost
|
||||
}
|
||||
}
|
||||
if glog.V(3) {
|
||||
glog.V(3).Infoln("And:", it.UID(), "Choosing:", best.UID(), "Best:", bestCost)
|
||||
}
|
||||
|
||||
// TODO(barakmich): Optimization of order need not stop here. Picking a smart
|
||||
// Contains() order based on probability of getting a false Contains() first is
|
||||
|
|
@ -320,7 +329,7 @@ func (it *And) Stats() graph.IteratorStats {
|
|||
Size := primaryStats.Size
|
||||
for _, sub := range it.internalIterators {
|
||||
stats := sub.Stats()
|
||||
NextCost += stats.ContainsCost
|
||||
NextCost += stats.ContainsCost * (primaryStats.Size / (stats.Size + 1))
|
||||
ContainsCost += stats.ContainsCost
|
||||
if Size > stats.Size {
|
||||
Size = stats.Size
|
||||
|
|
@ -330,6 +339,8 @@ func (it *And) Stats() graph.IteratorStats {
|
|||
ContainsCost: ContainsCost * 2,
|
||||
NextCost: NextCost,
|
||||
Size: Size,
|
||||
Next: it.runstats.Next,
|
||||
Contains: it.runstats.Contains,
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,9 +108,10 @@ func (it *Fixed) DebugString(indent int) string {
|
|||
if len(it.values) > 0 {
|
||||
value = fmt.Sprint(it.values[0])
|
||||
}
|
||||
return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)",
|
||||
return fmt.Sprintf("%s(%s %d tags: %s Size: %d id0: %d)",
|
||||
strings.Repeat(" ", indent),
|
||||
it.Type(),
|
||||
it.UID(),
|
||||
it.tags.Fixed(),
|
||||
len(it.values),
|
||||
value,
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ type HasA struct {
|
|||
dir quad.Direction
|
||||
resultIt graph.Iterator
|
||||
result graph.Value
|
||||
runstats graph.IteratorStats
|
||||
}
|
||||
|
||||
// Construct a new HasA iterator, given the triple subiterator, and the triple
|
||||
|
|
@ -143,6 +144,7 @@ func (it *HasA) DebugString(indent int) string {
|
|||
// and then Next() values out of that iterator and Contains() them against our subiterator.
|
||||
func (it *HasA) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
it.runstats.Contains += 1
|
||||
if glog.V(4) {
|
||||
glog.V(4).Infoln("Id is", it.ts.NameOf(val))
|
||||
}
|
||||
|
|
@ -159,6 +161,7 @@ func (it *HasA) Contains(val graph.Value) bool {
|
|||
// another match is made.
|
||||
func (it *HasA) NextContains() bool {
|
||||
for graph.Next(it.resultIt) {
|
||||
it.runstats.ContainsNext += 1
|
||||
link := it.resultIt.Result()
|
||||
if glog.V(4) {
|
||||
glog.V(4).Infoln("Quad is", it.ts.Quad(link))
|
||||
|
|
@ -193,6 +196,7 @@ func (it *HasA) NextPath() bool {
|
|||
// pull our direction out of it, and return that.
|
||||
func (it *HasA) Next() bool {
|
||||
graph.NextLogIn(it)
|
||||
it.runstats.Next += 1
|
||||
if it.resultIt != nil {
|
||||
it.resultIt.Close()
|
||||
}
|
||||
|
|
@ -229,6 +233,9 @@ func (it *HasA) Stats() graph.IteratorStats {
|
|||
NextCost: tripleConstant + subitStats.NextCost,
|
||||
ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost,
|
||||
Size: faninFactor * subitStats.Size,
|
||||
Next: it.runstats.Next,
|
||||
Contains: it.runstats.Contains,
|
||||
ContainsNext: it.runstats.ContainsNext,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -244,5 +251,5 @@ func (it *HasA) Close() {
|
|||
func (it *HasA) Type() graph.Type { return graph.HasA }
|
||||
|
||||
func (it *HasA) Size() (int64, bool) {
|
||||
return 0, true
|
||||
return it.Stats().Size, false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@ import (
|
|||
|
||||
var nextIteratorID uint64
|
||||
|
||||
func init() {
|
||||
atomic.StoreUint64(&nextIteratorID, 1)
|
||||
}
|
||||
|
||||
func NextUID() uint64 {
|
||||
return atomic.AddUint64(&nextIteratorID, 1) - 1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ type LinksTo struct {
|
|||
dir quad.Direction
|
||||
nextIt graph.Iterator
|
||||
result graph.Value
|
||||
runstats graph.IteratorStats
|
||||
}
|
||||
|
||||
// Construct a new LinksTo iterator around a direction and a subiterator of
|
||||
|
|
@ -118,6 +119,7 @@ func (it *LinksTo) DebugString(indent int) string {
|
|||
// for the LinksTo.
|
||||
func (it *LinksTo) Contains(val graph.Value) bool {
|
||||
graph.ContainsLogIn(it, val)
|
||||
it.runstats.Contains += 1
|
||||
node := it.ts.TripleDirection(val, it.dir)
|
||||
if it.primaryIt.Contains(node) {
|
||||
it.result = val
|
||||
|
|
@ -155,7 +157,9 @@ func (it *LinksTo) Optimize() (graph.Iterator, bool) {
|
|||
// Next()ing a LinksTo operates as described above.
|
||||
func (it *LinksTo) Next() bool {
|
||||
graph.NextLogIn(it)
|
||||
it.runstats.Next += 1
|
||||
if graph.Next(it.nextIt) {
|
||||
it.runstats.ContainsNext += 1
|
||||
it.result = it.nextIt.Result()
|
||||
return graph.NextLogOut(it, it.nextIt, true)
|
||||
}
|
||||
|
|
@ -201,6 +205,9 @@ func (it *LinksTo) Stats() graph.IteratorStats {
|
|||
NextCost: nextConstant + subitStats.NextCost,
|
||||
ContainsCost: checkConstant + subitStats.ContainsCost,
|
||||
Size: fanoutFactor * subitStats.Size,
|
||||
Next: it.runstats.Next,
|
||||
Contains: it.runstats.Contains,
|
||||
ContainsNext: it.runstats.ContainsNext,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,11 +45,13 @@ type Materialize struct {
|
|||
tags graph.Tagger
|
||||
containsMap map[graph.Value]int
|
||||
values [][]result
|
||||
actualSize int64
|
||||
index int
|
||||
subindex int
|
||||
subIt graph.Iterator
|
||||
hasRun bool
|
||||
aborted bool
|
||||
runstats graph.IteratorStats
|
||||
}
|
||||
|
||||
func NewMaterialize(sub graph.Iterator) *Materialize {
|
||||
|
|
@ -111,6 +113,7 @@ func (it *Materialize) Clone() graph.Iterator {
|
|||
out.aborted = it.aborted
|
||||
out.values = it.values
|
||||
out.containsMap = it.containsMap
|
||||
out.actualSize = it.actualSize
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
|
@ -171,8 +174,10 @@ func (it *Materialize) Optimize() (graph.Iterator, bool) {
|
|||
// Otherwise, guess based on the size of the subiterator.
|
||||
func (it *Materialize) Size() (int64, bool) {
|
||||
if it.hasRun && !it.aborted {
|
||||
return int64(len(it.values)), true
|
||||
glog.V(2).Infoln("returning size", it.actualSize)
|
||||
return it.actualSize, true
|
||||
}
|
||||
glog.V(2).Infoln("bailing size", it.actualSize)
|
||||
return it.subIt.Size()
|
||||
}
|
||||
|
||||
|
|
@ -186,11 +191,14 @@ func (it *Materialize) Stats() graph.IteratorStats {
|
|||
ContainsCost: overhead * subitStats.NextCost,
|
||||
NextCost: overhead * subitStats.NextCost,
|
||||
Size: size,
|
||||
Next: it.runstats.Next,
|
||||
Contains: it.runstats.Contains,
|
||||
}
|
||||
}
|
||||
|
||||
func (it *Materialize) Next() bool {
|
||||
graph.NextLogIn(it)
|
||||
it.runstats.Next += 1
|
||||
if !it.hasRun {
|
||||
it.materializeSet()
|
||||
}
|
||||
|
|
@ -208,6 +216,7 @@ func (it *Materialize) Next() bool {
|
|||
|
||||
func (it *Materialize) Contains(v graph.Value) bool {
|
||||
graph.ContainsLogIn(it, v)
|
||||
it.runstats.Contains += 1
|
||||
if !it.hasRun {
|
||||
it.materializeSet()
|
||||
}
|
||||
|
|
@ -264,10 +273,17 @@ func (it *Materialize) materializeSet() {
|
|||
tags := make(map[string]graph.Value)
|
||||
it.subIt.TagResults(tags)
|
||||
it.values[index] = append(it.values[index], result{id: id, tags: tags})
|
||||
it.actualSize += 1
|
||||
for it.subIt.NextPath() {
|
||||
i++
|
||||
if i > abortMaterializeAt {
|
||||
it.aborted = true
|
||||
break
|
||||
}
|
||||
tags := make(map[string]graph.Value)
|
||||
it.subIt.TagResults(tags)
|
||||
it.values[index] = append(it.values[index], result{id: id, tags: tags})
|
||||
it.actualSize += 1
|
||||
}
|
||||
}
|
||||
if it.aborted {
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@
|
|||
package gremlin
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
"github.com/robertkrimen/otto"
|
||||
|
||||
|
|
@ -278,5 +280,9 @@ func runIteratorOnSession(it graph.Iterator, ses *Session) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if glog.V(2) {
|
||||
bytes, _ := json.MarshalIndent(graph.DumpStats(it), "", " ")
|
||||
glog.V(2).Infoln(string(bytes))
|
||||
}
|
||||
it.Close()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue