diff --git a/graph/iterator/and_iterator_optimize.go b/graph/iterator/and_iterator_optimize.go index db841dd..cec5960 100644 --- a/graph/iterator/and_iterator_optimize.go +++ b/graph/iterator/and_iterator_optimize.go @@ -103,6 +103,7 @@ func (it *And) Optimize() (graph.Iterator, bool) { newReplacement, hasOne := it.qs.OptimizeIterator(newAnd) if hasOne { newAnd.Close() + glog.V(3).Infoln(it.UID(), "became", newReplacement.UID(), "from quadstore") return newReplacement, true } } @@ -330,7 +331,7 @@ func materializeIts(its []graph.Iterator) []graph.Iterator { out = append(out, its[0]) for _, it := range its[1:] { stats := it.Stats() - if stats.Size*stats.NextCost < (stats.ContainsCost * (1 + (stats.Size / (allStats.Size + 1)))) { + if false && stats.Size*stats.NextCost < (stats.ContainsCost*(1+(stats.Size/(allStats.Size+1)))) { if graph.Height(it, graph.Materialize) > 10 { out = append(out, NewMaterialize(it)) continue diff --git a/graph/iterator/hasa_iterator.go b/graph/iterator/hasa_iterator.go index 9547c54..32be43b 100644 --- a/graph/iterator/hasa_iterator.go +++ b/graph/iterator/hasa_iterator.go @@ -105,6 +105,14 @@ func (it *HasA) Optimize() (graph.Iterator, bool) { return it.primaryIt, true } } + // Ask the graph.QuadStore if we can be replaced. Often times, this is a great + // optimization opportunity (there's a fixed iterator underneath us, for + // example). + newReplacement, hasOne := it.qs.OptimizeIterator(it) + if hasOne { + it.Close() + return newReplacement, true + } return it, false } diff --git a/graph/iterator/not_iterator.go b/graph/iterator/not_iterator.go index 4d393e8..6813a5d 100644 --- a/graph/iterator/not_iterator.go +++ b/graph/iterator/not_iterator.go @@ -140,6 +140,7 @@ func (it *Not) Optimize() (graph.Iterator, bool) { if optimized { it.primaryIt = optimizedPrimaryIt } + it.primaryIt = NewMaterialize(it.primaryIt) return it, false } diff --git a/graph/sql/builder_iterator.go b/graph/sql/builder_iterator.go index f470845..867789d 100644 --- a/graph/sql/builder_iterator.go +++ b/graph/sql/builder_iterator.go @@ -46,6 +46,7 @@ func (td tableDir) String() string { type clause interface { toSQL() (string, []string) getTables() map[string]bool + size() int } type baseClause struct { @@ -65,6 +66,8 @@ func (b baseClause) toSQL() (string, []string) { return fmt.Sprintf("%s = ?", b.pair), []string{b.strTarget[0]} } +func (b baseClause) size() int { return 1 } + func (b baseClause) getTables() map[string]bool { out := make(map[string]bool) if b.pair.table != "" { @@ -82,7 +85,27 @@ type joinClause struct { op clauseOp } +func (jc joinClause) size() int { + size := 0 + if jc.left != nil { + size += jc.left.size() + } + if jc.right != nil { + size += jc.right.size() + } + return size +} + func (jc joinClause) toSQL() (string, []string) { + if jc.left == nil { + if jc.right == nil { + return "", []string{} + } + return jc.right.toSQL() + } + if jc.right == nil { + return jc.left.toSQL() + } l, lstr := jc.left.toSQL() r, rstr := jc.right.toSQL() lstr = append(lstr, rstr...) @@ -93,13 +116,20 @@ func (jc joinClause) toSQL() (string, []string) { case orClause: op = "OR" } - return fmt.Sprint("(%s %s %s)", l, op, r), lstr + return fmt.Sprintf("(%s %s %s)", l, op, r), lstr } func (jc joinClause) getTables() map[string]bool { - m := jc.left.getTables() - for k, _ := range jc.right.getTables() { - m[k] = true + var m map[string]bool + if jc.left != nil { + m = jc.left.getTables() + } else { + m = make(map[string]bool) + } + if jc.right != nil { + for k, _ := range jc.right.getTables() { + m[k] = true + } } return m } @@ -166,8 +196,14 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ t = []string{fmt.Sprintf("%s.%s as __execd", it.tableName(), it.dir)} } for _, v := range it.tags { + if v.pair.table == "" { + v.pair.table = it.tableName() + } t = append(t, fmt.Sprintf("%s as %s", v.pair, v.t)) } + for _, v := range it.tagger.Tags() { + t = append(t, fmt.Sprintf("%s as %s", tableDir{it.tableName(), it.dir}, v)) + } str += strings.Join(t, ", ") str += " FROM " t = []string{fmt.Sprintf("quads as %s", it.tableName())} @@ -180,7 +216,7 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ str += " WHERE " var values []string var s string - if it.stType != node { + if len(it.buildWhere) != 0 { s, values = it.canonicalizeWhere() } if it.where != nil { @@ -191,28 +227,31 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ s += where values = append(values, v2...) } - str += s + if contains { + if s != "" { + s += " AND " + } if it.stType == link { q := v.(quad.Quad) - str += " AND " t = []string{ fmt.Sprintf("%s.subject = ?", it.tableName()), fmt.Sprintf("%s.predicate = ?", it.tableName()), fmt.Sprintf("%s.object = ?", it.tableName()), fmt.Sprintf("%s.label = ?", it.tableName()), } - str += " " + strings.Join(t, " AND ") + " " + s += " " + strings.Join(t, " AND ") + " " values = append(values, q.Subject) values = append(values, q.Predicate) values = append(values, q.Object) values = append(values, q.Label) } else { - str += fmt.Sprintf(" AND %s.%s = ? ", it.tableName(), it.dir) + s += fmt.Sprintf("%s.%s = ? ", it.tableName(), it.dir) values = append(values, v.(string)) } } + str += s if it.stType == node { str += " ORDER BY __execd " } @@ -220,6 +259,14 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ for i := 1; i <= len(values); i++ { str = strings.Replace(str, "?", fmt.Sprintf("$%d", i), 1) } + glog.V(2).Infoln(str) + if glog.V(4) { + dstr := str + for i := 1; i <= len(values); i++ { + dstr = strings.Replace(dstr, fmt.Sprintf("$%d", i), fmt.Sprintf("'%s'", values[i-1]), 1) + } + glog.V(4).Infoln(dstr) + } return str, values } @@ -254,6 +301,7 @@ func (it *StatementIterator) Clone() graph.Iterator { where: it.where, stType: it.stType, size: it.size, + dir: it.dir, } copy(it.tags, m.tags) m.tagger.CopyFrom(it) @@ -364,6 +412,12 @@ func (it *StatementIterator) Contains(v graph.Value) bool { ivalues = append(ivalues, v) } it.cursor, err = it.qs.db.Query(q, ivalues...) + if err != nil { + glog.Errorf("Couldn't make query: %v", err) + it.err = err + it.cursor.Close() + return false + } it.cols, err = it.cursor.Columns() if err != nil { glog.Errorf("Couldn't get columns") @@ -414,10 +468,17 @@ func (it *StatementIterator) Size() (int64, bool) { return it.size, true } if it.stType == node { - return it.qs.Size(), true + if it.where == nil { + return it.qs.Size() / int64(len(it.buildWhere)+1), true + } + return it.qs.Size() / int64(it.where.size()+len(it.buildWhere)+1), true } b := it.buildWhere[0] - it.size = it.qs.sizeForIterator(false, b.pair.dir, b.strTarget[0]) + if len(b.strTarget) > 0 { + it.size = it.qs.sizeForIterator(false, b.pair.dir, b.strTarget[0]) + } else { + return it.qs.Size(), false + } return it.size, true } @@ -425,7 +486,7 @@ func (it *StatementIterator) Describe() graph.Description { size, _ := it.Size() return graph.Description{ UID: it.UID(), - Name: "SQL_QUERY", + Name: fmt.Sprintf("SQL_QUERY: %#v", it), Type: it.Type(), Size: size, } @@ -451,7 +512,7 @@ func (it *StatementIterator) makeCursor() { } cursor, err := it.qs.db.Query(q, ivalues...) if err != nil { - glog.Errorln("Couldn't get cursor from SQL database: %v", err) + glog.Errorf("Couldn't get cursor from SQL database: %v", err) cursor = nil } it.cursor = cursor @@ -542,7 +603,7 @@ func (it *StatementIterator) Next() bool { return graph.NextLogOut(it, nil, false) } it.buildResult(0) - return graph.NextLogOut(it, it.result, true) + return graph.NextLogOut(it, it.Result(), true) } func (it *StatementIterator) scan() ([]string, error) { diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go new file mode 100644 index 0000000..c07741b --- /dev/null +++ b/graph/sql/optimizers.go @@ -0,0 +1,287 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "errors" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +func intersect(a *StatementIterator, b *StatementIterator) (*StatementIterator, error) { + if a.stType != b.stType { + return nil, errors.New("Cannot combine SQL iterators of two different types") + } + min := a.size + if b.size < a.size { + min = b.size + } + var where clause + if a.where == nil { + if b.where == nil { + where = nil + } + where = b.where + } else { + if b.where == nil { + where = a.where + } + where = joinClause{a.where, b.where, andClause} + } + out := &StatementIterator{ + uid: iterator.NextUID(), + qs: a.qs, + buildWhere: append(a.buildWhere, b.buildWhere...), + tags: append(a.tags, b.tags...), + where: where, + stType: a.stType, + size: min, + dir: a.dir, + } + out.tagger.CopyFrom(a) + out.tagger.CopyFrom(b) + if out.stType == node { + out.buildWhere = append(out.buildWhere, baseClause{ + pair: tableDir{"", a.dir}, + target: tableDir{b.tableName(), b.dir}, + }) + } + return out, nil +} + +func hasa(a *StatementIterator, d quad.Direction) (*StatementIterator, error) { + if a.stType != link { + return nil, errors.New("Can't take the HASA of a link SQL iterator") + } + + out := &StatementIterator{ + uid: iterator.NextUID(), + qs: a.qs, + stType: node, + dir: d, + } + where := a.where + for _, w := range a.buildWhere { + w.pair.table = out.tableName() + wherenew := joinClause{where, w, andClause} + where = wherenew + } + out.where = where + //out := &StatementIterator{ + //uid: iterator.NextUID(), + //qs: a.qs, + //stType: node, + //dir: d, + //buildWhere: a.buildWhere, + //where: a.where, + //size: -1, + //} + for k, v := range a.tagger.Fixed() { + out.tagger.AddFixed(k, v) + } + var tags []tag + for _, t := range a.tagger.Tags() { + tags = append(tags, tag{ + pair: tableDir{ + table: out.tableName(), + dir: quad.Any, + }, + t: t, + }) + } + out.tags = append(tags, a.tags...) + return out, nil +} + +func linksto(a *StatementIterator, d quad.Direction) (*StatementIterator, error) { + if a.stType != node { + return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") + } + out := &StatementIterator{ + uid: iterator.NextUID(), + qs: a.qs, + stType: link, + dir: d, + size: -1, + } + where := a.where + for _, w := range a.buildWhere { + w.pair.table = a.tableName() + wherenew := joinClause{where, w, andClause} + where = wherenew + } + + out.where = where + out.buildWhere = []baseClause{ + baseClause{ + pair: tableDir{ + dir: d, + }, + target: tableDir{ + table: a.tableName(), + dir: a.dir, + }, + }, + } + var tags []tag + for _, t := range a.tagger.Tags() { + tags = append(tags, tag{ + pair: tableDir{ + table: a.tableName(), + dir: a.dir, + }, + t: t, + }) + } + for k, v := range a.tagger.Fixed() { + out.tagger.AddFixed(k, v) + } + for _, t := range a.tags { + if t.pair.table == "" { + t.pair.table = a.tableName() + } + tags = append(tags, t) + } + out.tags = tags + return out, nil +} + +func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { + switch it.Type() { + case graph.LinksTo: + return qs.optimizeLinksTo(it.(*iterator.LinksTo)) + case graph.HasA: + return qs.optimizeHasA(it.(*iterator.HasA)) + case graph.And: + return qs.optimizeAnd(it.(*iterator.And)) + } + return it, false +} + +func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { + subs := it.SubIterators() + if len(subs) != 1 { + return it, false + } + primary := subs[0] + switch primary.Type() { + case graph.Fixed: + size, _ := primary.Size() + if size == 1 { + if !graph.Next(primary) { + panic("unexpected size during optimize") + } + val := primary.Result() + newIt := qs.QuadIterator(it.Direction(), val) + nt := newIt.Tagger() + nt.CopyFrom(it) + for _, tag := range primary.Tagger().Tags() { + nt.AddFixed(tag, val) + } + it.Close() + return newIt, true + } + case sqlBuilderType: + newit, err := linksto(primary.(*StatementIterator), it.Direction()) + if err != nil { + glog.Errorln(err) + return it, false + } + newit.Tagger().CopyFrom(it) + return newit, true + case graph.All: + newit := &StatementIterator{ + uid: iterator.NextUID(), + qs: qs, + stType: link, + size: qs.Size(), + } + for _, t := range primary.Tagger().Tags() { + newit.tags = append(newit.tags, tag{ + pair: tableDir{"", it.Direction()}, + t: t, + }) + } + for k, v := range primary.Tagger().Fixed() { + newit.tagger.AddFixed(k, v) + } + newit.tagger.CopyFrom(it) + + return newit, true + } + return it, false +} + +func (qs *QuadStore) optimizeAnd(it *iterator.And) (graph.Iterator, bool) { + subs := it.SubIterators() + var unusedIts []graph.Iterator + var newit *StatementIterator + newit = nil + changed := false + var err error + + for _, it := range subs { + if it.Type() == sqlBuilderType { + if newit == nil { + newit = it.(*StatementIterator) + } else { + changed = true + newit, err = intersect(newit, it.(*StatementIterator)) + if err != nil { + glog.Error(err) + return it, false + } + } + } else { + unusedIts = append(unusedIts, it) + } + } + + if !changed { + return it, false + } + if len(unusedIts) == 0 { + newit.tagger.CopyFrom(it) + return newit, true + } + newAnd := iterator.NewAnd(qs) + newAnd.Tagger().CopyFrom(it) + newAnd.AddSubIterator(newit) + for _, i := range unusedIts { + newAnd.AddSubIterator(i) + } + return newAnd.Optimize() +} + +func (qs *QuadStore) optimizeHasA(it *iterator.HasA) (graph.Iterator, bool) { + subs := it.SubIterators() + if len(subs) != 1 { + return it, false + } + primary := subs[0] + if primary.Type() == sqlBuilderType { + newit, err := hasa(primary.(*StatementIterator), it.Direction()) + if err != nil { + glog.Errorln(err) + return it, false + } + newit.Tagger().CopyFrom(it) + return newit, true + } + return it, false +} diff --git a/graph/sql/optimizers_test.go b/graph/sql/optimizers_test.go new file mode 100644 index 0000000..916fa25 --- /dev/null +++ b/graph/sql/optimizers_test.go @@ -0,0 +1,128 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "fmt" + "testing" + + "github.com/google/cayley/graph" + "github.com/google/cayley/quad" +) + +func TestBuildIntersect(t *testing.T) { + a := NewStatementIterator(nil, quad.Subject, "Foo") + b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + it, err := intersect(a, b) + if err != nil { + t.Error(err) + } + s, v := it.buildQuery(false, nil) + fmt.Println(s, v) +} + +func TestBuildHasa(t *testing.T) { + a := NewStatementIterator(nil, quad.Subject, "Foo") + a.tagger.Add("foo") + b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + it1, err := intersect(a, b) + if err != nil { + t.Error(err) + } + it2, err := hasa(it1, quad.Object) + if err != nil { + t.Error(err) + } + s, v := it2.buildQuery(false, nil) + fmt.Println(s, v) +} + +func TestBuildLinksTo(t *testing.T) { + a := NewStatementIterator(nil, quad.Subject, "Foo") + b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + it1, err := intersect(a, b) + if err != nil { + t.Error(err) + } + it2, err := hasa(it1, quad.Object) + it2.tagger.Add("foo") + if err != nil { + t.Error(err) + } + it3, err := linksto(it2, quad.Subject) + if err != nil { + t.Error(err) + } + s, v := it3.buildQuery(false, nil) + fmt.Println(s, v) +} + +func TestInterestingQuery(t *testing.T) { + if *dbpath == "" { + t.SkipNow() + } + db, err := newQuadStore(*dbpath, nil) + if err != nil { + t.Fatal(err) + } + a := NewStatementIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") + b := NewStatementIterator(db.(*QuadStore), quad.Predicate, "name") + it1, err := intersect(a, b) + if err != nil { + t.Error(err) + } + it2, err := hasa(it1, quad.Subject) + if err != nil { + t.Error(err) + } + it2.Tagger().Add("hb") + it3, err := linksto(it2, quad.Object) + if err != nil { + t.Error(err) + } + b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/performance/actor") + it4, err := intersect(it3, b) + if err != nil { + t.Error(err) + } + it5, err := hasa(it4, quad.Subject) + if err != nil { + t.Error(err) + } + it6, err := linksto(it5, quad.Object) + if err != nil { + t.Error(err) + } + b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/film/starring") + it7, err := intersect(it6, b) + if err != nil { + t.Error(err) + } + it8, err := hasa(it7, quad.Subject) + if err != nil { + t.Error(err) + } + s, v := it8.buildQuery(false, nil) + it8.Tagger().Add("id") + fmt.Println(s, v) + for graph.Next(it8) { + fmt.Println(it8.Result()) + out := make(map[string]graph.Value) + it8.TagResults(out) + for k, v := range out { + fmt.Printf("%s: %v\n", k, v.(string)) + } + } +} diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index 26d1ff8..0cad60d 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -260,41 +260,6 @@ func (qs *QuadStore) Type() string { return QuadStoreType } -func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { - switch it.Type() { - case graph.LinksTo: - return qs.optimizeLinksTo(it.(*iterator.LinksTo)) - - } - return it, false -} - -func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { - subs := it.SubIterators() - if len(subs) != 1 { - return it, false - } - primary := subs[0] - if primary.Type() == graph.Fixed { - size, _ := primary.Size() - if size == 1 { - if !graph.Next(primary) { - panic("unexpected size during optimize") - } - val := primary.Result() - newIt := qs.QuadIterator(it.Direction(), val) - nt := newIt.Tagger() - nt.CopyFrom(it) - for _, tag := range primary.Tagger().Tags() { - nt.AddFixed(tag, val) - } - it.Close() - return newIt, true - } - } - return it, false -} - func (qs *QuadStore) sizeForIterator(isAll bool, dir quad.Direction, val string) int64 { var err error if isAll {