Optimize by collapsing trees into single SQL queries

This commit is contained in:
Barak Michener 2015-07-22 19:11:59 -04:00
parent 185e236f15
commit 621acae945
7 changed files with 501 additions and 50 deletions

View file

@ -103,6 +103,7 @@ func (it *And) Optimize() (graph.Iterator, bool) {
newReplacement, hasOne := it.qs.OptimizeIterator(newAnd)
if hasOne {
newAnd.Close()
glog.V(3).Infoln(it.UID(), "became", newReplacement.UID(), "from quadstore")
return newReplacement, true
}
}
@ -330,7 +331,7 @@ func materializeIts(its []graph.Iterator) []graph.Iterator {
out = append(out, its[0])
for _, it := range its[1:] {
stats := it.Stats()
if stats.Size*stats.NextCost < (stats.ContainsCost * (1 + (stats.Size / (allStats.Size + 1)))) {
if false && stats.Size*stats.NextCost < (stats.ContainsCost*(1+(stats.Size/(allStats.Size+1)))) {
if graph.Height(it, graph.Materialize) > 10 {
out = append(out, NewMaterialize(it))
continue

View file

@ -105,6 +105,14 @@ func (it *HasA) Optimize() (graph.Iterator, bool) {
return it.primaryIt, true
}
}
// Ask the graph.QuadStore if we can be replaced. Often times, this is a great
// optimization opportunity (there's a fixed iterator underneath us, for
// example).
newReplacement, hasOne := it.qs.OptimizeIterator(it)
if hasOne {
it.Close()
return newReplacement, true
}
return it, false
}

View file

@ -140,6 +140,7 @@ func (it *Not) Optimize() (graph.Iterator, bool) {
if optimized {
it.primaryIt = optimizedPrimaryIt
}
it.primaryIt = NewMaterialize(it.primaryIt)
return it, false
}

View file

@ -46,6 +46,7 @@ func (td tableDir) String() string {
type clause interface {
toSQL() (string, []string)
getTables() map[string]bool
size() int
}
type baseClause struct {
@ -65,6 +66,8 @@ func (b baseClause) toSQL() (string, []string) {
return fmt.Sprintf("%s = ?", b.pair), []string{b.strTarget[0]}
}
func (b baseClause) size() int { return 1 }
func (b baseClause) getTables() map[string]bool {
out := make(map[string]bool)
if b.pair.table != "" {
@ -82,7 +85,27 @@ type joinClause struct {
op clauseOp
}
func (jc joinClause) size() int {
size := 0
if jc.left != nil {
size += jc.left.size()
}
if jc.right != nil {
size += jc.right.size()
}
return size
}
func (jc joinClause) toSQL() (string, []string) {
if jc.left == nil {
if jc.right == nil {
return "", []string{}
}
return jc.right.toSQL()
}
if jc.right == nil {
return jc.left.toSQL()
}
l, lstr := jc.left.toSQL()
r, rstr := jc.right.toSQL()
lstr = append(lstr, rstr...)
@ -93,14 +116,21 @@ func (jc joinClause) toSQL() (string, []string) {
case orClause:
op = "OR"
}
return fmt.Sprint("(%s %s %s)", l, op, r), lstr
return fmt.Sprintf("(%s %s %s)", l, op, r), lstr
}
func (jc joinClause) getTables() map[string]bool {
m := jc.left.getTables()
var m map[string]bool
if jc.left != nil {
m = jc.left.getTables()
} else {
m = make(map[string]bool)
}
if jc.right != nil {
for k, _ := range jc.right.getTables() {
m[k] = true
}
}
return m
}
@ -166,8 +196,14 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [
t = []string{fmt.Sprintf("%s.%s as __execd", it.tableName(), it.dir)}
}
for _, v := range it.tags {
if v.pair.table == "" {
v.pair.table = it.tableName()
}
t = append(t, fmt.Sprintf("%s as %s", v.pair, v.t))
}
for _, v := range it.tagger.Tags() {
t = append(t, fmt.Sprintf("%s as %s", tableDir{it.tableName(), it.dir}, v))
}
str += strings.Join(t, ", ")
str += " FROM "
t = []string{fmt.Sprintf("quads as %s", it.tableName())}
@ -180,7 +216,7 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [
str += " WHERE "
var values []string
var s string
if it.stType != node {
if len(it.buildWhere) != 0 {
s, values = it.canonicalizeWhere()
}
if it.where != nil {
@ -191,28 +227,31 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [
s += where
values = append(values, v2...)
}
str += s
if contains {
if s != "" {
s += " AND "
}
if it.stType == link {
q := v.(quad.Quad)
str += " AND "
t = []string{
fmt.Sprintf("%s.subject = ?", it.tableName()),
fmt.Sprintf("%s.predicate = ?", it.tableName()),
fmt.Sprintf("%s.object = ?", it.tableName()),
fmt.Sprintf("%s.label = ?", it.tableName()),
}
str += " " + strings.Join(t, " AND ") + " "
s += " " + strings.Join(t, " AND ") + " "
values = append(values, q.Subject)
values = append(values, q.Predicate)
values = append(values, q.Object)
values = append(values, q.Label)
} else {
str += fmt.Sprintf(" AND %s.%s = ? ", it.tableName(), it.dir)
s += fmt.Sprintf("%s.%s = ? ", it.tableName(), it.dir)
values = append(values, v.(string))
}
}
str += s
if it.stType == node {
str += " ORDER BY __execd "
}
@ -220,6 +259,14 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [
for i := 1; i <= len(values); i++ {
str = strings.Replace(str, "?", fmt.Sprintf("$%d", i), 1)
}
glog.V(2).Infoln(str)
if glog.V(4) {
dstr := str
for i := 1; i <= len(values); i++ {
dstr = strings.Replace(dstr, fmt.Sprintf("$%d", i), fmt.Sprintf("'%s'", values[i-1]), 1)
}
glog.V(4).Infoln(dstr)
}
return str, values
}
@ -254,6 +301,7 @@ func (it *StatementIterator) Clone() graph.Iterator {
where: it.where,
stType: it.stType,
size: it.size,
dir: it.dir,
}
copy(it.tags, m.tags)
m.tagger.CopyFrom(it)
@ -364,6 +412,12 @@ func (it *StatementIterator) Contains(v graph.Value) bool {
ivalues = append(ivalues, v)
}
it.cursor, err = it.qs.db.Query(q, ivalues...)
if err != nil {
glog.Errorf("Couldn't make query: %v", err)
it.err = err
it.cursor.Close()
return false
}
it.cols, err = it.cursor.Columns()
if err != nil {
glog.Errorf("Couldn't get columns")
@ -414,10 +468,17 @@ func (it *StatementIterator) Size() (int64, bool) {
return it.size, true
}
if it.stType == node {
return it.qs.Size(), true
if it.where == nil {
return it.qs.Size() / int64(len(it.buildWhere)+1), true
}
return it.qs.Size() / int64(it.where.size()+len(it.buildWhere)+1), true
}
b := it.buildWhere[0]
if len(b.strTarget) > 0 {
it.size = it.qs.sizeForIterator(false, b.pair.dir, b.strTarget[0])
} else {
return it.qs.Size(), false
}
return it.size, true
}
@ -425,7 +486,7 @@ func (it *StatementIterator) Describe() graph.Description {
size, _ := it.Size()
return graph.Description{
UID: it.UID(),
Name: "SQL_QUERY",
Name: fmt.Sprintf("SQL_QUERY: %#v", it),
Type: it.Type(),
Size: size,
}
@ -451,7 +512,7 @@ func (it *StatementIterator) makeCursor() {
}
cursor, err := it.qs.db.Query(q, ivalues...)
if err != nil {
glog.Errorln("Couldn't get cursor from SQL database: %v", err)
glog.Errorf("Couldn't get cursor from SQL database: %v", err)
cursor = nil
}
it.cursor = cursor
@ -542,7 +603,7 @@ func (it *StatementIterator) Next() bool {
return graph.NextLogOut(it, nil, false)
}
it.buildResult(0)
return graph.NextLogOut(it, it.result, true)
return graph.NextLogOut(it, it.Result(), true)
}
func (it *StatementIterator) scan() ([]string, error) {

287
graph/sql/optimizers.go Normal file
View file

@ -0,0 +1,287 @@
// Copyright 2015 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sql
import (
"errors"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad"
)
func intersect(a *StatementIterator, b *StatementIterator) (*StatementIterator, error) {
if a.stType != b.stType {
return nil, errors.New("Cannot combine SQL iterators of two different types")
}
min := a.size
if b.size < a.size {
min = b.size
}
var where clause
if a.where == nil {
if b.where == nil {
where = nil
}
where = b.where
} else {
if b.where == nil {
where = a.where
}
where = joinClause{a.where, b.where, andClause}
}
out := &StatementIterator{
uid: iterator.NextUID(),
qs: a.qs,
buildWhere: append(a.buildWhere, b.buildWhere...),
tags: append(a.tags, b.tags...),
where: where,
stType: a.stType,
size: min,
dir: a.dir,
}
out.tagger.CopyFrom(a)
out.tagger.CopyFrom(b)
if out.stType == node {
out.buildWhere = append(out.buildWhere, baseClause{
pair: tableDir{"", a.dir},
target: tableDir{b.tableName(), b.dir},
})
}
return out, nil
}
func hasa(a *StatementIterator, d quad.Direction) (*StatementIterator, error) {
if a.stType != link {
return nil, errors.New("Can't take the HASA of a link SQL iterator")
}
out := &StatementIterator{
uid: iterator.NextUID(),
qs: a.qs,
stType: node,
dir: d,
}
where := a.where
for _, w := range a.buildWhere {
w.pair.table = out.tableName()
wherenew := joinClause{where, w, andClause}
where = wherenew
}
out.where = where
//out := &StatementIterator{
//uid: iterator.NextUID(),
//qs: a.qs,
//stType: node,
//dir: d,
//buildWhere: a.buildWhere,
//where: a.where,
//size: -1,
//}
for k, v := range a.tagger.Fixed() {
out.tagger.AddFixed(k, v)
}
var tags []tag
for _, t := range a.tagger.Tags() {
tags = append(tags, tag{
pair: tableDir{
table: out.tableName(),
dir: quad.Any,
},
t: t,
})
}
out.tags = append(tags, a.tags...)
return out, nil
}
func linksto(a *StatementIterator, d quad.Direction) (*StatementIterator, error) {
if a.stType != node {
return nil, errors.New("Can't take the LINKSTO of a node SQL iterator")
}
out := &StatementIterator{
uid: iterator.NextUID(),
qs: a.qs,
stType: link,
dir: d,
size: -1,
}
where := a.where
for _, w := range a.buildWhere {
w.pair.table = a.tableName()
wherenew := joinClause{where, w, andClause}
where = wherenew
}
out.where = where
out.buildWhere = []baseClause{
baseClause{
pair: tableDir{
dir: d,
},
target: tableDir{
table: a.tableName(),
dir: a.dir,
},
},
}
var tags []tag
for _, t := range a.tagger.Tags() {
tags = append(tags, tag{
pair: tableDir{
table: a.tableName(),
dir: a.dir,
},
t: t,
})
}
for k, v := range a.tagger.Fixed() {
out.tagger.AddFixed(k, v)
}
for _, t := range a.tags {
if t.pair.table == "" {
t.pair.table = a.tableName()
}
tags = append(tags, t)
}
out.tags = tags
return out, nil
}
func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {
switch it.Type() {
case graph.LinksTo:
return qs.optimizeLinksTo(it.(*iterator.LinksTo))
case graph.HasA:
return qs.optimizeHasA(it.(*iterator.HasA))
case graph.And:
return qs.optimizeAnd(it.(*iterator.And))
}
return it, false
}
func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) {
subs := it.SubIterators()
if len(subs) != 1 {
return it, false
}
primary := subs[0]
switch primary.Type() {
case graph.Fixed:
size, _ := primary.Size()
if size == 1 {
if !graph.Next(primary) {
panic("unexpected size during optimize")
}
val := primary.Result()
newIt := qs.QuadIterator(it.Direction(), val)
nt := newIt.Tagger()
nt.CopyFrom(it)
for _, tag := range primary.Tagger().Tags() {
nt.AddFixed(tag, val)
}
it.Close()
return newIt, true
}
case sqlBuilderType:
newit, err := linksto(primary.(*StatementIterator), it.Direction())
if err != nil {
glog.Errorln(err)
return it, false
}
newit.Tagger().CopyFrom(it)
return newit, true
case graph.All:
newit := &StatementIterator{
uid: iterator.NextUID(),
qs: qs,
stType: link,
size: qs.Size(),
}
for _, t := range primary.Tagger().Tags() {
newit.tags = append(newit.tags, tag{
pair: tableDir{"", it.Direction()},
t: t,
})
}
for k, v := range primary.Tagger().Fixed() {
newit.tagger.AddFixed(k, v)
}
newit.tagger.CopyFrom(it)
return newit, true
}
return it, false
}
func (qs *QuadStore) optimizeAnd(it *iterator.And) (graph.Iterator, bool) {
subs := it.SubIterators()
var unusedIts []graph.Iterator
var newit *StatementIterator
newit = nil
changed := false
var err error
for _, it := range subs {
if it.Type() == sqlBuilderType {
if newit == nil {
newit = it.(*StatementIterator)
} else {
changed = true
newit, err = intersect(newit, it.(*StatementIterator))
if err != nil {
glog.Error(err)
return it, false
}
}
} else {
unusedIts = append(unusedIts, it)
}
}
if !changed {
return it, false
}
if len(unusedIts) == 0 {
newit.tagger.CopyFrom(it)
return newit, true
}
newAnd := iterator.NewAnd(qs)
newAnd.Tagger().CopyFrom(it)
newAnd.AddSubIterator(newit)
for _, i := range unusedIts {
newAnd.AddSubIterator(i)
}
return newAnd.Optimize()
}
func (qs *QuadStore) optimizeHasA(it *iterator.HasA) (graph.Iterator, bool) {
subs := it.SubIterators()
if len(subs) != 1 {
return it, false
}
primary := subs[0]
if primary.Type() == sqlBuilderType {
newit, err := hasa(primary.(*StatementIterator), it.Direction())
if err != nil {
glog.Errorln(err)
return it, false
}
newit.Tagger().CopyFrom(it)
return newit, true
}
return it, false
}

View file

@ -0,0 +1,128 @@
// Copyright 2015 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sql
import (
"fmt"
"testing"
"github.com/google/cayley/graph"
"github.com/google/cayley/quad"
)
func TestBuildIntersect(t *testing.T) {
a := NewStatementIterator(nil, quad.Subject, "Foo")
b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to")
it, err := intersect(a, b)
if err != nil {
t.Error(err)
}
s, v := it.buildQuery(false, nil)
fmt.Println(s, v)
}
func TestBuildHasa(t *testing.T) {
a := NewStatementIterator(nil, quad.Subject, "Foo")
a.tagger.Add("foo")
b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to")
it1, err := intersect(a, b)
if err != nil {
t.Error(err)
}
it2, err := hasa(it1, quad.Object)
if err != nil {
t.Error(err)
}
s, v := it2.buildQuery(false, nil)
fmt.Println(s, v)
}
func TestBuildLinksTo(t *testing.T) {
a := NewStatementIterator(nil, quad.Subject, "Foo")
b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to")
it1, err := intersect(a, b)
if err != nil {
t.Error(err)
}
it2, err := hasa(it1, quad.Object)
it2.tagger.Add("foo")
if err != nil {
t.Error(err)
}
it3, err := linksto(it2, quad.Subject)
if err != nil {
t.Error(err)
}
s, v := it3.buildQuery(false, nil)
fmt.Println(s, v)
}
func TestInterestingQuery(t *testing.T) {
if *dbpath == "" {
t.SkipNow()
}
db, err := newQuadStore(*dbpath, nil)
if err != nil {
t.Fatal(err)
}
a := NewStatementIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart")
b := NewStatementIterator(db.(*QuadStore), quad.Predicate, "name")
it1, err := intersect(a, b)
if err != nil {
t.Error(err)
}
it2, err := hasa(it1, quad.Subject)
if err != nil {
t.Error(err)
}
it2.Tagger().Add("hb")
it3, err := linksto(it2, quad.Object)
if err != nil {
t.Error(err)
}
b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/performance/actor")
it4, err := intersect(it3, b)
if err != nil {
t.Error(err)
}
it5, err := hasa(it4, quad.Subject)
if err != nil {
t.Error(err)
}
it6, err := linksto(it5, quad.Object)
if err != nil {
t.Error(err)
}
b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/film/starring")
it7, err := intersect(it6, b)
if err != nil {
t.Error(err)
}
it8, err := hasa(it7, quad.Subject)
if err != nil {
t.Error(err)
}
s, v := it8.buildQuery(false, nil)
it8.Tagger().Add("id")
fmt.Println(s, v)
for graph.Next(it8) {
fmt.Println(it8.Result())
out := make(map[string]graph.Value)
it8.TagResults(out)
for k, v := range out {
fmt.Printf("%s: %v\n", k, v.(string))
}
}
}

View file

@ -260,41 +260,6 @@ func (qs *QuadStore) Type() string {
return QuadStoreType
}
func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {
switch it.Type() {
case graph.LinksTo:
return qs.optimizeLinksTo(it.(*iterator.LinksTo))
}
return it, false
}
func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) {
subs := it.SubIterators()
if len(subs) != 1 {
return it, false
}
primary := subs[0]
if primary.Type() == graph.Fixed {
size, _ := primary.Size()
if size == 1 {
if !graph.Next(primary) {
panic("unexpected size during optimize")
}
val := primary.Result()
newIt := qs.QuadIterator(it.Direction(), val)
nt := newIt.Tagger()
nt.CopyFrom(it)
for _, tag := range primary.Tagger().Tags() {
nt.AddFixed(tag, val)
}
it.Close()
return newIt, true
}
}
return it, false
}
func (qs *QuadStore) sizeForIterator(isAll bool, dir quad.Direction, val string) int64 {
var err error
if isAll {