From 3f391a782c7cda1b4360c1577133d7bde9481349 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Thu, 9 Apr 2015 17:28:41 -0400 Subject: [PATCH 01/18] first working-ish Postgres backend Subcommits: implement iterator and remove ResultTree add Err() to sql remove redundant and less helpful indices, change fillfactor, and use COPY FROM --- cmd/cayley/cayley.go | 11 ++ graph/sql/iterator.go | 270 ++++++++++++++++++++++++++++++++++++++++++++ graph/sql/lru.go | 63 +++++++++++ graph/sql/quadstore.go | 295 ++++++++++++++++++++++++++++++++++++++++++++++++ quad/quad.go | 2 + query/gremlin/finals.go | 4 +- 6 files changed, 643 insertions(+), 2 deletions(-) create mode 100644 graph/sql/iterator.go create mode 100644 graph/sql/lru.go create mode 100644 graph/sql/quadstore.go diff --git a/cmd/cayley/cayley.go b/cmd/cayley/cayley.go index 302d9e7..7f76559 100644 --- a/cmd/cayley/cayley.go +++ b/cmd/cayley/cayley.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "runtime" + "runtime/pprof" "time" "github.com/barakmich/glog" @@ -36,6 +37,7 @@ import ( _ "github.com/google/cayley/graph/leveldb" _ "github.com/google/cayley/graph/memstore" _ "github.com/google/cayley/graph/mongo" + _ "github.com/google/cayley/graph/sql" // Load writer registry _ "github.com/google/cayley/writer" @@ -147,6 +149,15 @@ func main() { os.Args = append(os.Args[:1], os.Args[2:]...) flag.Parse() + if *cpuprofile != "" { + f, err := os.Create(*cpuprofile) + if err != nil { + glog.Fatal(err) + } + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + var buildString string if Version != "" { buildString = fmt.Sprint("Cayley ", Version, " built ", BuildDate) diff --git a/graph/sql/iterator.go b/graph/sql/iterator.go new file mode 100644 index 0000000..af8a8a7 --- /dev/null +++ b/graph/sql/iterator.go @@ -0,0 +1,270 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "database/sql" + "fmt" + + "github.com/barakmich/glog" + + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +type Iterator struct { + uid uint64 + tags graph.Tagger + qs *QuadStore + dir quad.Direction + val graph.Value + size int64 + isAll bool + table string + cursor *sql.Rows + result graph.Value + err error +} + +func (it *Iterator) makeCursor() { + var cursor *sql.Rows + var err error + if it.cursor != nil { + it.cursor.Close() + } + if it.isAll { + if it.table == "quads" { + cursor, err = it.qs.db.Query(`SELECT subject, predicate, object, label FROM quads;`) + if err != nil { + glog.Errorln("Couldn't get cursor from SQL database: %v", err) + cursor = nil + } + } else { + glog.V(4).Infoln("sql: getting node query") + cursor, err = it.qs.db.Query(`SELECT node FROM + ( + SELECT subject FROM quads + UNION + SELECT predicate FROM quads + UNION + SELECT object FROM quads + UNION + SELECT label FROM quads + ) AS DistinctNodes (node) WHERE node IS NOT NULL;`) + if err != nil { + glog.Errorln("Couldn't get cursor from SQL database: %v", err) + cursor = nil + } + glog.V(4).Infoln("sql: got node query") + } + } else { + cursor, err = it.qs.db.Query( + fmt.Sprintf("SELECT subject, predicate, object, label FROM quads WHERE %s = $1;", it.dir.String()), it.val.(string)) + if err != nil { + glog.Errorln("Couldn't get cursor from SQL database: %v", err) + cursor = nil + } + } + it.cursor = cursor +} + +func NewIterator(qs *QuadStore, d quad.Direction, val graph.Value) *Iterator { + it := &Iterator{ + uid: iterator.NextUID(), + qs: qs, + dir: d, + size: -1, + val: val, + table: "quads", + isAll: false, + } + return it +} + +func NewAllIterator(qs *QuadStore, table string) *Iterator { + var size int64 + it := &Iterator{ + uid: iterator.NextUID(), + qs: qs, + dir: quad.Any, + size: size, + table: table, + isAll: true, + } + return it +} + +func (it *Iterator) UID() uint64 { + return it.uid +} + +func (it *Iterator) Reset() { + it.err = nil + it.Close() +} + +func (it *Iterator) Err() error { + return it.err +} + +func (it *Iterator) Close() error { + if it.cursor != nil { + err := it.cursor.Close() + if err != nil { + return err + } + it.cursor = nil + } + return nil +} + +func (it *Iterator) Tagger() *graph.Tagger { + return &it.tags +} + +func (it *Iterator) TagResults(dst map[string]graph.Value) { + for _, tag := range it.tags.Tags() { + dst[tag] = it.Result() + } + + for tag, value := range it.tags.Fixed() { + dst[tag] = value + } +} + +func (it *Iterator) Clone() graph.Iterator { + var m *Iterator + if it.isAll { + m = NewAllIterator(it.qs, it.table) + } else { + m = NewIterator(it.qs, it.dir, it.val) + } + m.tags.CopyFrom(it) + return m +} + +func (it *Iterator) SubIterators() []graph.Iterator { + return nil +} + +func (it *Iterator) Next() bool { + graph.NextLogIn(it) + if it.cursor == nil { + it.makeCursor() + } + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + return false + } + if it.table == "nodes" { + var node string + err := it.cursor.Scan(&node) + if err != nil { + glog.Errorf("Error nexting node iterator: %v", err) + it.err = err + return false + } + it.result = node + return true + } + var q quad.Quad + err := it.cursor.Scan(&q.Subject, &q.Predicate, &q.Object, &q.Label) + if err != nil { + glog.Errorf("Error scanning sql iterator: %v", err) + it.err = err + return false + } + it.result = q + return graph.NextLogOut(it, it.result, true) +} + +func (it *Iterator) Contains(v graph.Value) bool { + graph.ContainsLogIn(it, v) + if it.isAll { + return graph.ContainsLogOut(it, v, true) + } + q := v.(quad.Quad) + if q.Get(it.dir) == it.val.(string) { + return graph.ContainsLogOut(it, v, true) + } + return graph.ContainsLogOut(it, v, false) +} + +func (it *Iterator) Size() (int64, bool) { + if it.size != -1 { + return it.size, true + } + it.size = it.qs.sizeForIterator(it.isAll, it.dir, it.val.(string)) + return it.size, true +} + +func (it *Iterator) Result() graph.Value { + return it.result +} + +func (it *Iterator) NextPath() bool { + return false +} + +var sqlType graph.Type + +func init() { + sqlType = graph.RegisterIterator("sql") +} + +func Type() graph.Type { return sqlType } + +func (it *Iterator) Type() graph.Type { + if it.isAll { + return graph.All + } + return sqlType +} + +func (it *Iterator) Sorted() bool { return true } +func (it *Iterator) Optimize() (graph.Iterator, bool) { return it, false } + +func (it *Iterator) Describe() graph.Description { + size, _ := it.Size() + return graph.Description{ + UID: it.UID(), + Name: fmt.Sprintf("%s/%s", it.val, it.dir), + Type: it.Type(), + Size: size, + } +} + +func (it *Iterator) Stats() graph.IteratorStats { + size, _ := it.Size() + if it.table == "nodes" { + return graph.IteratorStats{ + ContainsCost: 1, + NextCost: 9999, + Size: size, + } + } + return graph.IteratorStats{ + ContainsCost: 1, + NextCost: 5, + Size: size, + } +} diff --git a/graph/sql/lru.go b/graph/sql/lru.go new file mode 100644 index 0000000..e3aca7f --- /dev/null +++ b/graph/sql/lru.go @@ -0,0 +1,63 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "container/list" +) + +// cache implements an LRU cache. +type cache struct { + cache map[string]*list.Element + priority *list.List + maxSize int +} + +type kv struct { + key string + value int64 +} + +func newCache(size int) *cache { + var lru cache + lru.maxSize = size + lru.priority = list.New() + lru.cache = make(map[string]*list.Element) + return &lru +} + +func (lru *cache) Put(key string, value int64) { + if _, ok := lru.Get(key); ok { + return + } + if len(lru.cache) == lru.maxSize { + lru.removeOldest() + } + lru.priority.PushFront(kv{key: key, value: value}) + lru.cache[key] = lru.priority.Front() +} + +func (lru *cache) Get(key string) (int64, bool) { + if element, ok := lru.cache[key]; ok { + lru.priority.MoveToFront(element) + return element.Value.(kv).value, true + } + return 0, false +} + +func (lru *cache) removeOldest() { + last := lru.priority.Remove(lru.priority.Back()) + delete(lru.cache, last.(kv).key) +} diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go new file mode 100644 index 0000000..e6b7162 --- /dev/null +++ b/graph/sql/quadstore.go @@ -0,0 +1,295 @@ +package sql + +import ( + "database/sql" + "fmt" + + "github.com/lib/pq" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +const QuadStoreType = "sql" + +func init() { + graph.RegisterQuadStore(QuadStoreType, true, newQuadStore, createSQLTables, nil) +} + +type QuadStore struct { + db *sql.DB + sqlFlavor string + size int64 + lru *cache +} + +func connectSQLTables(addr string, _ graph.Options) (*sql.DB, error) { + // TODO(barakmich): Parse options for more friendly addr, other SQLs. + conn, err := sql.Open("postgres", addr) + if err != nil { + glog.Errorf("Couldn't open database at %s: %#v", addr, err) + return nil, err + } + return conn, nil +} + +func createSQLTables(addr string, options graph.Options) error { + conn, err := connectSQLTables(addr, options) + if err != nil { + return err + } + tx, err := conn.Begin() + if err != nil { + glog.Errorf("Couldn't begin creation transaction: %s", err) + return err + } + + quadTable, err := tx.Exec(` + CREATE TABLE quads ( + subject TEXT NOT NULL, + predicate TEXT NOT NULL, + object TEXT NOT NULL, + label TEXT, + horizon BIGSERIAL PRIMARY KEY, + id BIGINT, + ts timestamp, + UNIQUE(subject, predicate, object, label) + );`) + if err != nil { + glog.Errorf("Cannot create quad table: %v", quadTable) + return err + } + index, err := tx.Exec(` + CREATE INDEX pos_index ON quads (predicate, object, subject) WITH (FILLFACTOR = 50); + CREATE INDEX osp_index ON quads (object, subject, predicate) WITH (FILLFACTOR = 50); + `) + if err != nil { + glog.Errorf("Cannot create indices: %v", index) + return err + } + tx.Commit() + return nil +} + +func newQuadStore(addr string, options graph.Options) (graph.QuadStore, error) { + var qs QuadStore + conn, err := connectSQLTables(addr, options) + if err != nil { + return nil, err + } + qs.db = conn + qs.sqlFlavor = "postgres" + qs.size = -1 + qs.lru = newCache(1024) + return &qs, nil +} + +func (qs *QuadStore) copyFrom(tx *sql.Tx, in []graph.Delta) error { + stmt, err := tx.Prepare(pq.CopyIn("quads", "subject", "predicate", "object", "label", "id", "ts")) + if err != nil { + return err + } + for _, d := range in { + _, err := stmt.Exec(d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label, d.ID.Int(), d.Timestamp) + if err != nil { + glog.Errorf("couldn't prepare COPY statement: %v", err) + return err + } + } + _, err = stmt.Exec() + if err != nil { + return err + } + return stmt.Close() +} + +func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { + allAdds := true + for _, d := range in { + if d.Action != graph.Add { + allAdds = false + } + } + if allAdds { + return qs.copyFrom(tx, in) + } + + insert, err := tx.Prepare(`INSERT INTO quads(subject, predicate, object, label, id, ts) VALUES ($1, $2, $3, $4, $5, $6)`) + if err != nil { + glog.Errorf("Cannot prepare insert statement: %v", err) + return err + } + for _, d := range in { + switch d.Action { + case graph.Add: + _, err := insert.Exec(d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label, d.ID.Int(), d.Timestamp) + if err != nil { + glog.Errorf("couldn't prepare INSERT statement: %v", err) + return err + } + //for _, dir := range quad.Directions { + //_, err := tx.Exec(` + //WITH upsert AS (UPDATE nodes SET size=size+1 WHERE node=$1 RETURNING *) + //INSERT INTO nodes (node, size) SELECT $1, 1 WHERE NOT EXISTS (SELECT * FROM UPSERT); + //`, d.Quad.Get(dir)) + //if err != nil { + //glog.Errorf("couldn't prepare upsert statement in direction %s: %v", dir, err) + //return err + //} + //} + case graph.Delete: + _, err := tx.Exec(`DELETE FROM quads WHERE subject=$1 and predicate=$2 and object=$3 and label=$4;`, + d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label) + if err != nil { + glog.Errorf("couldn't prepare DELETE statement: %v", err) + } + //for _, dir := range quad.Directions { + //tx.Exec(`UPDATE nodes SET size=size-1 WHERE node=$1;`, d.Quad.Get(dir)) + //} + default: + panic("unknown action") + } + } + return nil +} + +func (qs *QuadStore) ApplyDeltas(in []graph.Delta, _ graph.IgnoreOpts) error { + // TODO(barakmich): Support ignoreOpts? "ON CONFLICT IGNORE" + tx, err := qs.db.Begin() + if err != nil { + glog.Errorf("couldn't begin write transaction: %v", err) + return err + } + switch qs.sqlFlavor { + case "postgres": + err = qs.buildTxPostgres(tx, in) + if err != nil { + return err + } + default: + panic("no support for flavor: " + qs.sqlFlavor) + } + return tx.Commit() +} + +func (qs *QuadStore) Quad(val graph.Value) quad.Quad { + return val.(quad.Quad) +} + +func (qs *QuadStore) QuadIterator(d quad.Direction, val graph.Value) graph.Iterator { + return NewIterator(qs, d, val) +} + +func (qs *QuadStore) NodesAllIterator() graph.Iterator { + return NewAllIterator(qs, "nodes") +} + +func (qs *QuadStore) QuadsAllIterator() graph.Iterator { + return NewAllIterator(qs, "quads") +} + +func (qs *QuadStore) ValueOf(s string) graph.Value { + return s +} + +func (qs *QuadStore) NameOf(v graph.Value) string { + return v.(string) +} + +func (qs *QuadStore) Size() int64 { + // TODO(barakmich): Sync size with writes. + if qs.size != -1 { + return qs.size + } + c := qs.db.QueryRow("SELECT COUNT(*) FROM quads;") + err := c.Scan(&qs.size) + if err != nil { + glog.Errorf("Couldn't execute COUNT: %v", err) + return 0 + } + return qs.size +} + +func (qs *QuadStore) Horizon() graph.PrimaryKey { + var horizon int64 + err := qs.db.QueryRow("SELECT horizon FROM quads ORDER BY horizon DESC LIMIT 1;").Scan(&horizon) + if err != nil { + glog.Errorf("Couldn't execute horizon: %v", err) + return graph.NewSequentialKey(0) + } + return graph.NewSequentialKey(horizon) +} + +func (qs *QuadStore) FixedIterator() graph.FixedIterator { + return iterator.NewFixed(iterator.Identity) +} + +func (qs *QuadStore) Close() { + qs.db.Close() +} + +func (qs *QuadStore) QuadDirection(in graph.Value, d quad.Direction) graph.Value { + q := in.(quad.Quad) + return q.Get(d) +} + +func (qs *QuadStore) Type() string { + return QuadStoreType +} + +func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { + switch it.Type() { + case graph.LinksTo: + return qs.optimizeLinksTo(it.(*iterator.LinksTo)) + + } + return it, false +} + +func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { + subs := it.SubIterators() + if len(subs) != 1 { + return it, false + } + primary := subs[0] + if primary.Type() == graph.Fixed { + size, _ := primary.Size() + if size == 1 { + if !graph.Next(primary) { + panic("unexpected size during optimize") + } + val := primary.Result() + newIt := qs.QuadIterator(it.Direction(), val) + nt := newIt.Tagger() + nt.CopyFrom(it) + for _, tag := range primary.Tagger().Tags() { + nt.AddFixed(tag, val) + } + it.Close() + return newIt, true + } + } + return it, false +} + +func (qs *QuadStore) sizeForIterator(isAll bool, dir quad.Direction, val string) int64 { + var err error + if isAll { + return qs.Size() + } + if val, ok := qs.lru.Get(val + string(dir.Prefix())); ok { + return val + } + var size int64 + glog.V(4).Infoln("sql: getting size for select %s, %s", dir.String(), val) + err = qs.db.QueryRow( + fmt.Sprintf("SELECT count(*) FROM quads WHERE %s = $1;", dir.String()), val).Scan(&size) + if err != nil { + glog.Errorln("Error getting size from SQL database: %v", err) + return 0 + } + qs.lru.Put(val+string(dir.Prefix()), size) + return size +} diff --git a/quad/quad.go b/quad/quad.go index e448cf2..62e22ca 100644 --- a/quad/quad.go +++ b/quad/quad.go @@ -66,6 +66,8 @@ const ( Label ) +var Directions = []Direction{Subject, Predicate, Object, Label} + func (d Direction) Prefix() byte { switch d { case Any: diff --git a/query/gremlin/finals.go b/query/gremlin/finals.go index 583b8d0..0631d41 100644 --- a/query/gremlin/finals.go +++ b/query/gremlin/finals.go @@ -281,9 +281,9 @@ func (wk *worker) runIterator(it graph.Iterator) { if glog.V(2) { b, err := json.MarshalIndent(it.Describe(), "", " ") if err != nil { - glog.Infof("failed to format description: %v", err) + glog.V(2).Infof("failed to format description: %v", err) } else { - glog.Infof("%s", b) + glog.V(2).Infof("%s", b) } } for { From da391c3db74b8c1043a0e8a4f81f17f3bd7ce6b2 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Tue, 14 Jul 2015 21:43:09 -0400 Subject: [PATCH 02/18] optional index strategies --- graph/sql/iterator.go | 5 ++--- graph/sql/quadstore.go | 29 +++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/graph/sql/iterator.go b/graph/sql/iterator.go index af8a8a7..a0750ef 100644 --- a/graph/sql/iterator.go +++ b/graph/sql/iterator.go @@ -95,12 +95,11 @@ func NewIterator(qs *QuadStore, d quad.Direction, val graph.Value) *Iterator { } func NewAllIterator(qs *QuadStore, table string) *Iterator { - var size int64 it := &Iterator{ uid: iterator.NextUID(), qs: qs, dir: quad.Any, - size: size, + size: qs.Size(), table: table, isAll: true, } @@ -255,7 +254,7 @@ func (it *Iterator) Describe() graph.Description { func (it *Iterator) Stats() graph.IteratorStats { size, _ := it.Size() - if it.table == "nodes" { + if it.table == "nodes" || it.isAll { return graph.IteratorStats{ ContainsCost: 1, NextCost: 9999, diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index e6b7162..c52e942 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -61,10 +61,31 @@ func createSQLTables(addr string, options graph.Options) error { glog.Errorf("Cannot create quad table: %v", quadTable) return err } - index, err := tx.Exec(` - CREATE INDEX pos_index ON quads (predicate, object, subject) WITH (FILLFACTOR = 50); - CREATE INDEX osp_index ON quads (object, subject, predicate) WITH (FILLFACTOR = 50); - `) + idxStrat, _, err := options.StringKey("db_index_strategy") + factor, factorOk, err := options.IntKey("db_fill_factor") + if !factorOk { + factor = 50 + } + var index sql.Result + if idxStrat == "brin" { + index, err = tx.Exec(` + CREATE INDEX spo_index ON quads USING brin(subject) WITH (pages_per_range = 32); + CREATE INDEX pos_index ON quads USING brin(predicate) WITH (pages_per_range = 32); + CREATE INDEX osp_index ON quads USING brin(object) WITH (pages_per_range = 32); + `) + } else if idxStrat == "prefix" { + index, err = tx.Exec(fmt.Sprintf(` + CREATE INDEX spo_index ON quads (substr(subject, 0, 8)) WITH (FILLFACTOR = %d); + CREATE INDEX pos_index ON quads (substr(predicate, 0, 8)) WITH (FILLFACTOR = %d); + CREATE INDEX osp_index ON quads (substr(object, 0, 8)) WITH (FILLFACTOR = %d); + `, factor, factor, factor)) + } else { + index, err = tx.Exec(fmt.Sprintf(` + CREATE INDEX spo_index ON quads (subject, predicate, object) WITH (FILLFACTOR = %d); + CREATE INDEX pos_index ON quads (predicate, object, subject) WITH (FILLFACTOR = %d); + CREATE INDEX osp_index ON quads (object, subject, predicate) WITH (FILLFACTOR = %d); + `, factor, factor, factor)) + } if err != nil { glog.Errorf("Cannot create indices: %v", index) return err From 185e236f158794e2bcfe77ef852edbe8cdd11402 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 15 Jul 2015 18:29:55 -0400 Subject: [PATCH 03/18] attempt to build more interesting SQL queries Subcommits: fix old iterator, and flesh out new builder iterator fix contains for builder iterator Working replacement iterator --- graph/sql/builder_iterator.go | 582 +++++++++++++++++++++++++++++++++++++ graph/sql/builder_iterator_test.go | 110 +++++++ graph/sql/iterator.go | 7 +- graph/sql/quadstore.go | 2 +- integration/integration_test.go | 15 +- 5 files changed, 712 insertions(+), 4 deletions(-) create mode 100644 graph/sql/builder_iterator.go create mode 100644 graph/sql/builder_iterator_test.go diff --git a/graph/sql/builder_iterator.go b/graph/sql/builder_iterator.go new file mode 100644 index 0000000..f470845 --- /dev/null +++ b/graph/sql/builder_iterator.go @@ -0,0 +1,582 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "database/sql" + "fmt" + "strings" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +var sqlBuilderType graph.Type + +func init() { + sqlBuilderType = graph.RegisterIterator("sqlbuilder") +} + +type tableDir struct { + table string + dir quad.Direction +} + +func (td tableDir) String() string { + if td.table != "" { + return fmt.Sprintf("%s.%s", td.table, td.dir) + } + return "ERR" +} + +type clause interface { + toSQL() (string, []string) + getTables() map[string]bool +} + +type baseClause struct { + pair tableDir + strTarget []string + target tableDir +} + +func (b baseClause) toSQL() (string, []string) { + if len(b.strTarget) > 1 { + // TODO(barakmich): Sets of things, IN clause + return "", []string{} + } + if len(b.strTarget) == 0 { + return fmt.Sprintf("%s = %s", b.pair, b.target), []string{} + } + return fmt.Sprintf("%s = ?", b.pair), []string{b.strTarget[0]} +} + +func (b baseClause) getTables() map[string]bool { + out := make(map[string]bool) + if b.pair.table != "" { + out[b.pair.table] = true + } + if b.target.table != "" { + out[b.target.table] = true + } + return out +} + +type joinClause struct { + left clause + right clause + op clauseOp +} + +func (jc joinClause) toSQL() (string, []string) { + l, lstr := jc.left.toSQL() + r, rstr := jc.right.toSQL() + lstr = append(lstr, rstr...) + var op string + switch jc.op { + case andClause: + op = "AND" + case orClause: + op = "OR" + } + return fmt.Sprint("(%s %s %s)", l, op, r), lstr +} + +func (jc joinClause) getTables() map[string]bool { + m := jc.left.getTables() + for k, _ := range jc.right.getTables() { + m[k] = true + } + return m +} + +type tag struct { + pair tableDir + t string +} + +type statementType int + +const ( + node statementType = iota + link +) + +type clauseOp int + +const ( + andClause clauseOp = iota + orClause +) + +func (it *StatementIterator) canonicalizeWhere() (string, []string) { + var out []string + var values []string + for _, b := range it.buildWhere { + b.pair.table = it.tableName() + s, v := b.toSQL() + values = append(values, v...) + out = append(out, s) + } + return strings.Join(out, " AND "), values +} + +func (it *StatementIterator) getTables() map[string]bool { + m := make(map[string]bool) + if it.where != nil { + m = it.where.getTables() + } + for _, t := range it.tags { + if t.pair.table != "" { + m[t.pair.table] = true + } + } + return m +} + +func (it *StatementIterator) tableName() string { + return fmt.Sprintf("t_%d", it.uid) +} + +func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, []string) { + str := "SELECT " + var t []string + if it.stType == link { + t = []string{ + fmt.Sprintf("%s.subject", it.tableName()), + fmt.Sprintf("%s.predicate", it.tableName()), + fmt.Sprintf("%s.object", it.tableName()), + fmt.Sprintf("%s.label", it.tableName()), + } + } else { + t = []string{fmt.Sprintf("%s.%s as __execd", it.tableName(), it.dir)} + } + for _, v := range it.tags { + t = append(t, fmt.Sprintf("%s as %s", v.pair, v.t)) + } + str += strings.Join(t, ", ") + str += " FROM " + t = []string{fmt.Sprintf("quads as %s", it.tableName())} + for k, _ := range it.getTables() { + if k != it.tableName() { + t = append(t, fmt.Sprintf("quads as %s", k)) + } + } + str += strings.Join(t, ", ") + str += " WHERE " + var values []string + var s string + if it.stType != node { + s, values = it.canonicalizeWhere() + } + if it.where != nil { + if s != "" { + s += " AND " + } + where, v2 := it.where.toSQL() + s += where + values = append(values, v2...) + } + str += s + if contains { + if it.stType == link { + q := v.(quad.Quad) + str += " AND " + t = []string{ + fmt.Sprintf("%s.subject = ?", it.tableName()), + fmt.Sprintf("%s.predicate = ?", it.tableName()), + fmt.Sprintf("%s.object = ?", it.tableName()), + fmt.Sprintf("%s.label = ?", it.tableName()), + } + str += " " + strings.Join(t, " AND ") + " " + values = append(values, q.Subject) + values = append(values, q.Predicate) + values = append(values, q.Object) + values = append(values, q.Label) + } else { + str += fmt.Sprintf(" AND %s.%s = ? ", it.tableName(), it.dir) + values = append(values, v.(string)) + } + + } + if it.stType == node { + str += " ORDER BY __execd " + } + str += ";" + for i := 1; i <= len(values); i++ { + str = strings.Replace(str, "?", fmt.Sprintf("$%d", i), 1) + } + return str, values +} + +type StatementIterator struct { + uid uint64 + qs *QuadStore + + // Only for links + buildWhere []baseClause + + where clause + tagger graph.Tagger + tags []tag + err error + cursor *sql.Rows + stType statementType + dir quad.Direction + result map[string]string + resultIndex int + resultList [][]string + resultNext [][]string + cols []string + resultQuad quad.Quad + size int64 +} + +func (it *StatementIterator) Clone() graph.Iterator { + m := &StatementIterator{ + uid: iterator.NextUID(), + qs: it.qs, + buildWhere: it.buildWhere, + where: it.where, + stType: it.stType, + size: it.size, + } + copy(it.tags, m.tags) + m.tagger.CopyFrom(it) + return m +} + +func NewStatementIterator(qs *QuadStore, d quad.Direction, val string) *StatementIterator { + it := &StatementIterator{ + uid: iterator.NextUID(), + qs: qs, + buildWhere: []baseClause{ + baseClause{ + pair: tableDir{"", d}, + strTarget: []string{val}, + }, + }, + stType: link, + size: -1, + } + return it +} + +func (it *StatementIterator) UID() uint64 { + return it.uid +} + +func (it *StatementIterator) Reset() { + it.err = nil + it.Close() +} + +func (it *StatementIterator) Err() error { + return it.err +} + +func (it *StatementIterator) Close() error { + if it.cursor != nil { + err := it.cursor.Close() + if err != nil { + return err + } + it.cursor = nil + } + return nil +} + +func (it *StatementIterator) Tagger() *graph.Tagger { + return &it.tagger +} + +func (it *StatementIterator) Result() graph.Value { + if it.stType == node { + return it.result["__execd"] + } + return it.resultQuad +} + +func (it *StatementIterator) TagResults(dst map[string]graph.Value) { + for tag, value := range it.result { + if tag == "__execd" { + for _, tag := range it.tagger.Tags() { + dst[tag] = value + } + continue + } + dst[tag] = value + } + + for tag, value := range it.tagger.Fixed() { + dst[tag] = value + } +} + +func (it *StatementIterator) Type() graph.Type { + return sqlBuilderType +} + +func (it *StatementIterator) preFilter(v graph.Value) bool { + if it.stType == link { + q := v.(quad.Quad) + for _, b := range it.buildWhere { + if len(b.strTarget) == 0 { + continue + } + canFilter := true + for _, s := range b.strTarget { + if q.Get(b.pair.dir) == s { + canFilter = false + break + } + } + if canFilter { + return true + } + } + } + return false +} + +func (it *StatementIterator) Contains(v graph.Value) bool { + var err error + if it.preFilter(v) { + return false + } + q, values := it.buildQuery(true, v) + ivalues := make([]interface{}, 0, len(values)) + for _, v := range values { + ivalues = append(ivalues, v) + } + it.cursor, err = it.qs.db.Query(q, ivalues...) + it.cols, err = it.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + it.err = err + it.cursor.Close() + return false + } + it.resultList = nil + for { + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + break + } + s, err := it.scan() + if err != nil { + it.err = err + it.cursor.Close() + return false + } + it.resultList = append(it.resultList, s) + } + it.cursor.Close() + it.cursor = nil + if len(it.resultList) != 0 { + it.resultIndex = 0 + it.buildResult(0) + return true + } + return false +} + +func (it *StatementIterator) SubIterators() []graph.Iterator { + return nil +} + +func (it *StatementIterator) Sorted() bool { return false } +func (it *StatementIterator) Optimize() (graph.Iterator, bool) { return it, false } + +func (it *StatementIterator) Size() (int64, bool) { + + if it.size != -1 { + return it.size, true + } + if it.stType == node { + return it.qs.Size(), true + } + b := it.buildWhere[0] + it.size = it.qs.sizeForIterator(false, b.pair.dir, b.strTarget[0]) + return it.size, true +} + +func (it *StatementIterator) Describe() graph.Description { + size, _ := it.Size() + return graph.Description{ + UID: it.UID(), + Name: "SQL_QUERY", + Type: it.Type(), + Size: size, + } +} + +func (it *StatementIterator) Stats() graph.IteratorStats { + size, _ := it.Size() + return graph.IteratorStats{ + ContainsCost: 1, + NextCost: 5, + Size: size, + } +} + +func (it *StatementIterator) makeCursor() { + if it.cursor != nil { + it.cursor.Close() + } + q, values := it.buildQuery(false, nil) + ivalues := make([]interface{}, 0, len(values)) + for _, v := range values { + ivalues = append(ivalues, v) + } + cursor, err := it.qs.db.Query(q, ivalues...) + if err != nil { + glog.Errorln("Couldn't get cursor from SQL database: %v", err) + cursor = nil + } + it.cursor = cursor +} + +func (it *StatementIterator) NextPath() bool { + it.resultIndex += 1 + if it.resultIndex >= len(it.resultList) { + return false + } + it.buildResult(it.resultIndex) + return true +} + +func (it *StatementIterator) Next() bool { + var err error + graph.NextLogIn(it) + if it.cursor == nil { + it.makeCursor() + it.cols, err = it.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + it.err = err + it.cursor.Close() + return false + } + // iterate the first one + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + return false + } + s, err := it.scan() + if err != nil { + it.err = err + it.cursor.Close() + return false + } + it.resultNext = append(it.resultNext, s) + } + if it.resultList != nil && it.resultNext == nil { + // We're on something and there's no next + return false + } + it.resultList = it.resultNext + it.resultNext = nil + it.resultIndex = 0 + for { + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + break + } + s, err := it.scan() + if err != nil { + it.err = err + it.cursor.Close() + return false + } + if it.stType == node { + if it.resultList[0][0] != s[0] { + it.resultNext = append(it.resultNext, s) + break + } else { + it.resultList = append(it.resultList, s) + } + } else { + if it.resultList[0][0] == s[0] && it.resultList[0][1] == s[1] && it.resultList[0][2] == s[2] && it.resultList[0][3] == s[3] { + it.resultList = append(it.resultList, s) + } else { + it.resultNext = append(it.resultNext, s) + break + } + } + + } + if len(it.resultList) == 0 { + return graph.NextLogOut(it, nil, false) + } + it.buildResult(0) + return graph.NextLogOut(it, it.result, true) +} + +func (it *StatementIterator) scan() ([]string, error) { + pointers := make([]interface{}, len(it.cols)) + container := make([]string, len(it.cols)) + for i, _ := range pointers { + pointers[i] = &container[i] + } + err := it.cursor.Scan(pointers...) + if err != nil { + glog.Errorf("Error scanning iterator: %v", err) + it.err = err + return nil, err + } + return container, nil +} + +func (it *StatementIterator) buildResult(i int) { + container := it.resultList[i] + if it.stType == node { + it.result = make(map[string]string) + for i, c := range it.cols { + it.result[c] = container[i] + } + return + } + var q quad.Quad + q.Subject = container[0] + q.Predicate = container[1] + q.Object = container[2] + q.Label = container[3] + it.resultQuad = q + it.result = make(map[string]string) + for i, c := range it.cols[4:] { + it.result[c] = container[i+4] + } +} diff --git a/graph/sql/builder_iterator_test.go b/graph/sql/builder_iterator_test.go new file mode 100644 index 0000000..cbb960d --- /dev/null +++ b/graph/sql/builder_iterator_test.go @@ -0,0 +1,110 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "flag" + "fmt" + "testing" + + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +var dbpath = flag.String("dbpath", "", "Path to running DB") + +func TestSimpleSQL(t *testing.T) { + it := NewStatementIterator(nil, quad.Object, "cool") + s, v := it.buildQuery(false, nil) + fmt.Println(s, v) +} + +// Functional tests + +func TestQuadIteration(t *testing.T) { + if *dbpath == "" { + t.SkipNow() + } + db, err := newQuadStore(*dbpath, nil) + if err != nil { + t.Fatal(err) + } + it := NewStatementIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") + for graph.Next(it) { + fmt.Println(it.Result()) + } + it = NewStatementIterator(db.(*QuadStore), quad.Subject, "/en/casablanca_1942") + s, v := it.buildQuery(false, nil) + fmt.Println(s, v) + c := 0 + for graph.Next(it) { + fmt.Println(it.Result()) + c += 1 + } + if c != 18 { + t.Errorf("Not enough results, got %d expected 18") + } +} + +func TestNodeIteration(t *testing.T) { + if *dbpath == "" { + t.SkipNow() + } + db, err := newQuadStore(*dbpath, nil) + if err != nil { + t.Fatal(err) + } + it := &StatementIterator{ + uid: iterator.NextUID(), + qs: db.(*QuadStore), + stType: node, + dir: quad.Object, + tags: []tag{ + tag{ + pair: tableDir{ + table: "t_4", + dir: quad.Subject, + }, + t: "x", + }, + }, + where: baseClause{ + pair: tableDir{ + table: "t_4", + dir: quad.Subject, + }, + strTarget: []string{"/en/casablanca_1942"}, + }, + } + s, v := it.buildQuery(false, nil) + it.Tagger().Add("id") + fmt.Println(s, v) + for graph.Next(it) { + fmt.Println(it.Result()) + out := make(map[string]graph.Value) + it.TagResults(out) + for k, v := range out { + fmt.Printf("%s: %v\n", k, v.(string)) + } + } + contains := it.Contains("Casablanca") + s, v = it.buildQuery(true, "Casablanca") + fmt.Println(s, v) + it.Tagger().Add("id") + if !contains { + t.Error("Didn't contain Casablanca") + } +} diff --git a/graph/sql/iterator.go b/graph/sql/iterator.go index a0750ef..1482eaa 100644 --- a/graph/sql/iterator.go +++ b/graph/sql/iterator.go @@ -199,10 +199,12 @@ func (it *Iterator) Next() bool { func (it *Iterator) Contains(v graph.Value) bool { graph.ContainsLogIn(it, v) if it.isAll { + it.result = v return graph.ContainsLogOut(it, v, true) } q := v.(quad.Quad) if q.Get(it.dir) == it.val.(string) { + it.result = v return graph.ContainsLogOut(it, v, true) } return graph.ContainsLogOut(it, v, false) @@ -217,6 +219,9 @@ func (it *Iterator) Size() (int64, bool) { } func (it *Iterator) Result() graph.Value { + if it.result == nil { + glog.Fatalln("result was nil", it) + } return it.result } @@ -239,7 +244,7 @@ func (it *Iterator) Type() graph.Type { return sqlType } -func (it *Iterator) Sorted() bool { return true } +func (it *Iterator) Sorted() bool { return false } func (it *Iterator) Optimize() (graph.Iterator, bool) { return it, false } func (it *Iterator) Describe() graph.Description { diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index c52e942..26d1ff8 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -200,7 +200,7 @@ func (qs *QuadStore) Quad(val graph.Value) quad.Quad { } func (qs *QuadStore) QuadIterator(d quad.Direction, val graph.Value) graph.Iterator { - return NewIterator(qs, d, val) + return NewStatementIterator(qs, d, val.(string)) } func (qs *QuadStore) NodesAllIterator() graph.Iterator { diff --git a/integration/integration_test.go b/integration/integration_test.go index 4b5b726..05ddac0 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -37,12 +37,14 @@ import ( _ "github.com/google/cayley/graph/leveldb" _ "github.com/google/cayley/graph/memstore" _ "github.com/google/cayley/graph/mongo" + _ "github.com/google/cayley/graph/sql" // Load writer registry _ "github.com/google/cayley/writer" ) var backend = flag.String("backend", "memstore", "Which backend to test. Loads test data to /tmp if not present.") +var backendPath = flag.String("backend_path", "", "Path to the chosen backend. Will have sane testing defaults if not specified") var benchmarkQueries = []struct { message string @@ -422,6 +424,7 @@ var ( ) func prepare(t testing.TB) { + var remote bool cfg.DatabaseType = *backend switch *backend { case "memstore": @@ -436,14 +439,21 @@ func prepare(t testing.TB) { cfg.DatabaseOptions = map[string]interface{}{ "database_name": "cayley_test", // provide a default test database } + remote = true + case "sql": + cfg.DatabasePath = "postgres://localhost/cayley_test" + remote = true default: t.Fatalf("Untestable backend store %s", *backend) } + if *backendPath != "" { + cfg.DatabasePath = *backendPath + } var err error create.Do(func() { needsLoad := true - if graph.IsPersistent(cfg.DatabaseType) { + if graph.IsPersistent(cfg.DatabaseType) && !remote { if _, err := os.Stat(cfg.DatabasePath); os.IsNotExist(err) { err = db.Init(cfg) if err != nil { @@ -459,7 +469,7 @@ func prepare(t testing.TB) { t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err) } - if needsLoad { + if needsLoad && !remote { err = internal.Load(handle.QuadWriter, cfg, "../data/30kmoviedata.nq.gz", "cquad") if err != nil { t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) @@ -524,6 +534,7 @@ func checkQueries(t *testing.T) { if testing.Short() && test.long { continue } + fmt.Printf("Now testing %s\n", test.message) ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true) _, err := ses.Parse(test.query) if err != nil { From 621acae945522007611190e8582f825803abf52e Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 22 Jul 2015 19:11:59 -0400 Subject: [PATCH 04/18] Optimize by collapsing trees into single SQL queries --- graph/iterator/and_iterator_optimize.go | 3 +- graph/iterator/hasa_iterator.go | 8 + graph/iterator/not_iterator.go | 1 + graph/sql/builder_iterator.go | 89 ++++++++-- graph/sql/optimizers.go | 287 ++++++++++++++++++++++++++++++++ graph/sql/optimizers_test.go | 128 ++++++++++++++ graph/sql/quadstore.go | 35 ---- 7 files changed, 501 insertions(+), 50 deletions(-) create mode 100644 graph/sql/optimizers.go create mode 100644 graph/sql/optimizers_test.go diff --git a/graph/iterator/and_iterator_optimize.go b/graph/iterator/and_iterator_optimize.go index db841dd..cec5960 100644 --- a/graph/iterator/and_iterator_optimize.go +++ b/graph/iterator/and_iterator_optimize.go @@ -103,6 +103,7 @@ func (it *And) Optimize() (graph.Iterator, bool) { newReplacement, hasOne := it.qs.OptimizeIterator(newAnd) if hasOne { newAnd.Close() + glog.V(3).Infoln(it.UID(), "became", newReplacement.UID(), "from quadstore") return newReplacement, true } } @@ -330,7 +331,7 @@ func materializeIts(its []graph.Iterator) []graph.Iterator { out = append(out, its[0]) for _, it := range its[1:] { stats := it.Stats() - if stats.Size*stats.NextCost < (stats.ContainsCost * (1 + (stats.Size / (allStats.Size + 1)))) { + if false && stats.Size*stats.NextCost < (stats.ContainsCost*(1+(stats.Size/(allStats.Size+1)))) { if graph.Height(it, graph.Materialize) > 10 { out = append(out, NewMaterialize(it)) continue diff --git a/graph/iterator/hasa_iterator.go b/graph/iterator/hasa_iterator.go index 9547c54..32be43b 100644 --- a/graph/iterator/hasa_iterator.go +++ b/graph/iterator/hasa_iterator.go @@ -105,6 +105,14 @@ func (it *HasA) Optimize() (graph.Iterator, bool) { return it.primaryIt, true } } + // Ask the graph.QuadStore if we can be replaced. Often times, this is a great + // optimization opportunity (there's a fixed iterator underneath us, for + // example). + newReplacement, hasOne := it.qs.OptimizeIterator(it) + if hasOne { + it.Close() + return newReplacement, true + } return it, false } diff --git a/graph/iterator/not_iterator.go b/graph/iterator/not_iterator.go index 4d393e8..6813a5d 100644 --- a/graph/iterator/not_iterator.go +++ b/graph/iterator/not_iterator.go @@ -140,6 +140,7 @@ func (it *Not) Optimize() (graph.Iterator, bool) { if optimized { it.primaryIt = optimizedPrimaryIt } + it.primaryIt = NewMaterialize(it.primaryIt) return it, false } diff --git a/graph/sql/builder_iterator.go b/graph/sql/builder_iterator.go index f470845..867789d 100644 --- a/graph/sql/builder_iterator.go +++ b/graph/sql/builder_iterator.go @@ -46,6 +46,7 @@ func (td tableDir) String() string { type clause interface { toSQL() (string, []string) getTables() map[string]bool + size() int } type baseClause struct { @@ -65,6 +66,8 @@ func (b baseClause) toSQL() (string, []string) { return fmt.Sprintf("%s = ?", b.pair), []string{b.strTarget[0]} } +func (b baseClause) size() int { return 1 } + func (b baseClause) getTables() map[string]bool { out := make(map[string]bool) if b.pair.table != "" { @@ -82,7 +85,27 @@ type joinClause struct { op clauseOp } +func (jc joinClause) size() int { + size := 0 + if jc.left != nil { + size += jc.left.size() + } + if jc.right != nil { + size += jc.right.size() + } + return size +} + func (jc joinClause) toSQL() (string, []string) { + if jc.left == nil { + if jc.right == nil { + return "", []string{} + } + return jc.right.toSQL() + } + if jc.right == nil { + return jc.left.toSQL() + } l, lstr := jc.left.toSQL() r, rstr := jc.right.toSQL() lstr = append(lstr, rstr...) @@ -93,13 +116,20 @@ func (jc joinClause) toSQL() (string, []string) { case orClause: op = "OR" } - return fmt.Sprint("(%s %s %s)", l, op, r), lstr + return fmt.Sprintf("(%s %s %s)", l, op, r), lstr } func (jc joinClause) getTables() map[string]bool { - m := jc.left.getTables() - for k, _ := range jc.right.getTables() { - m[k] = true + var m map[string]bool + if jc.left != nil { + m = jc.left.getTables() + } else { + m = make(map[string]bool) + } + if jc.right != nil { + for k, _ := range jc.right.getTables() { + m[k] = true + } } return m } @@ -166,8 +196,14 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ t = []string{fmt.Sprintf("%s.%s as __execd", it.tableName(), it.dir)} } for _, v := range it.tags { + if v.pair.table == "" { + v.pair.table = it.tableName() + } t = append(t, fmt.Sprintf("%s as %s", v.pair, v.t)) } + for _, v := range it.tagger.Tags() { + t = append(t, fmt.Sprintf("%s as %s", tableDir{it.tableName(), it.dir}, v)) + } str += strings.Join(t, ", ") str += " FROM " t = []string{fmt.Sprintf("quads as %s", it.tableName())} @@ -180,7 +216,7 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ str += " WHERE " var values []string var s string - if it.stType != node { + if len(it.buildWhere) != 0 { s, values = it.canonicalizeWhere() } if it.where != nil { @@ -191,28 +227,31 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ s += where values = append(values, v2...) } - str += s + if contains { + if s != "" { + s += " AND " + } if it.stType == link { q := v.(quad.Quad) - str += " AND " t = []string{ fmt.Sprintf("%s.subject = ?", it.tableName()), fmt.Sprintf("%s.predicate = ?", it.tableName()), fmt.Sprintf("%s.object = ?", it.tableName()), fmt.Sprintf("%s.label = ?", it.tableName()), } - str += " " + strings.Join(t, " AND ") + " " + s += " " + strings.Join(t, " AND ") + " " values = append(values, q.Subject) values = append(values, q.Predicate) values = append(values, q.Object) values = append(values, q.Label) } else { - str += fmt.Sprintf(" AND %s.%s = ? ", it.tableName(), it.dir) + s += fmt.Sprintf("%s.%s = ? ", it.tableName(), it.dir) values = append(values, v.(string)) } } + str += s if it.stType == node { str += " ORDER BY __execd " } @@ -220,6 +259,14 @@ func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, [ for i := 1; i <= len(values); i++ { str = strings.Replace(str, "?", fmt.Sprintf("$%d", i), 1) } + glog.V(2).Infoln(str) + if glog.V(4) { + dstr := str + for i := 1; i <= len(values); i++ { + dstr = strings.Replace(dstr, fmt.Sprintf("$%d", i), fmt.Sprintf("'%s'", values[i-1]), 1) + } + glog.V(4).Infoln(dstr) + } return str, values } @@ -254,6 +301,7 @@ func (it *StatementIterator) Clone() graph.Iterator { where: it.where, stType: it.stType, size: it.size, + dir: it.dir, } copy(it.tags, m.tags) m.tagger.CopyFrom(it) @@ -364,6 +412,12 @@ func (it *StatementIterator) Contains(v graph.Value) bool { ivalues = append(ivalues, v) } it.cursor, err = it.qs.db.Query(q, ivalues...) + if err != nil { + glog.Errorf("Couldn't make query: %v", err) + it.err = err + it.cursor.Close() + return false + } it.cols, err = it.cursor.Columns() if err != nil { glog.Errorf("Couldn't get columns") @@ -414,10 +468,17 @@ func (it *StatementIterator) Size() (int64, bool) { return it.size, true } if it.stType == node { - return it.qs.Size(), true + if it.where == nil { + return it.qs.Size() / int64(len(it.buildWhere)+1), true + } + return it.qs.Size() / int64(it.where.size()+len(it.buildWhere)+1), true } b := it.buildWhere[0] - it.size = it.qs.sizeForIterator(false, b.pair.dir, b.strTarget[0]) + if len(b.strTarget) > 0 { + it.size = it.qs.sizeForIterator(false, b.pair.dir, b.strTarget[0]) + } else { + return it.qs.Size(), false + } return it.size, true } @@ -425,7 +486,7 @@ func (it *StatementIterator) Describe() graph.Description { size, _ := it.Size() return graph.Description{ UID: it.UID(), - Name: "SQL_QUERY", + Name: fmt.Sprintf("SQL_QUERY: %#v", it), Type: it.Type(), Size: size, } @@ -451,7 +512,7 @@ func (it *StatementIterator) makeCursor() { } cursor, err := it.qs.db.Query(q, ivalues...) if err != nil { - glog.Errorln("Couldn't get cursor from SQL database: %v", err) + glog.Errorf("Couldn't get cursor from SQL database: %v", err) cursor = nil } it.cursor = cursor @@ -542,7 +603,7 @@ func (it *StatementIterator) Next() bool { return graph.NextLogOut(it, nil, false) } it.buildResult(0) - return graph.NextLogOut(it, it.result, true) + return graph.NextLogOut(it, it.Result(), true) } func (it *StatementIterator) scan() ([]string, error) { diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go new file mode 100644 index 0000000..c07741b --- /dev/null +++ b/graph/sql/optimizers.go @@ -0,0 +1,287 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "errors" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +func intersect(a *StatementIterator, b *StatementIterator) (*StatementIterator, error) { + if a.stType != b.stType { + return nil, errors.New("Cannot combine SQL iterators of two different types") + } + min := a.size + if b.size < a.size { + min = b.size + } + var where clause + if a.where == nil { + if b.where == nil { + where = nil + } + where = b.where + } else { + if b.where == nil { + where = a.where + } + where = joinClause{a.where, b.where, andClause} + } + out := &StatementIterator{ + uid: iterator.NextUID(), + qs: a.qs, + buildWhere: append(a.buildWhere, b.buildWhere...), + tags: append(a.tags, b.tags...), + where: where, + stType: a.stType, + size: min, + dir: a.dir, + } + out.tagger.CopyFrom(a) + out.tagger.CopyFrom(b) + if out.stType == node { + out.buildWhere = append(out.buildWhere, baseClause{ + pair: tableDir{"", a.dir}, + target: tableDir{b.tableName(), b.dir}, + }) + } + return out, nil +} + +func hasa(a *StatementIterator, d quad.Direction) (*StatementIterator, error) { + if a.stType != link { + return nil, errors.New("Can't take the HASA of a link SQL iterator") + } + + out := &StatementIterator{ + uid: iterator.NextUID(), + qs: a.qs, + stType: node, + dir: d, + } + where := a.where + for _, w := range a.buildWhere { + w.pair.table = out.tableName() + wherenew := joinClause{where, w, andClause} + where = wherenew + } + out.where = where + //out := &StatementIterator{ + //uid: iterator.NextUID(), + //qs: a.qs, + //stType: node, + //dir: d, + //buildWhere: a.buildWhere, + //where: a.where, + //size: -1, + //} + for k, v := range a.tagger.Fixed() { + out.tagger.AddFixed(k, v) + } + var tags []tag + for _, t := range a.tagger.Tags() { + tags = append(tags, tag{ + pair: tableDir{ + table: out.tableName(), + dir: quad.Any, + }, + t: t, + }) + } + out.tags = append(tags, a.tags...) + return out, nil +} + +func linksto(a *StatementIterator, d quad.Direction) (*StatementIterator, error) { + if a.stType != node { + return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") + } + out := &StatementIterator{ + uid: iterator.NextUID(), + qs: a.qs, + stType: link, + dir: d, + size: -1, + } + where := a.where + for _, w := range a.buildWhere { + w.pair.table = a.tableName() + wherenew := joinClause{where, w, andClause} + where = wherenew + } + + out.where = where + out.buildWhere = []baseClause{ + baseClause{ + pair: tableDir{ + dir: d, + }, + target: tableDir{ + table: a.tableName(), + dir: a.dir, + }, + }, + } + var tags []tag + for _, t := range a.tagger.Tags() { + tags = append(tags, tag{ + pair: tableDir{ + table: a.tableName(), + dir: a.dir, + }, + t: t, + }) + } + for k, v := range a.tagger.Fixed() { + out.tagger.AddFixed(k, v) + } + for _, t := range a.tags { + if t.pair.table == "" { + t.pair.table = a.tableName() + } + tags = append(tags, t) + } + out.tags = tags + return out, nil +} + +func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { + switch it.Type() { + case graph.LinksTo: + return qs.optimizeLinksTo(it.(*iterator.LinksTo)) + case graph.HasA: + return qs.optimizeHasA(it.(*iterator.HasA)) + case graph.And: + return qs.optimizeAnd(it.(*iterator.And)) + } + return it, false +} + +func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { + subs := it.SubIterators() + if len(subs) != 1 { + return it, false + } + primary := subs[0] + switch primary.Type() { + case graph.Fixed: + size, _ := primary.Size() + if size == 1 { + if !graph.Next(primary) { + panic("unexpected size during optimize") + } + val := primary.Result() + newIt := qs.QuadIterator(it.Direction(), val) + nt := newIt.Tagger() + nt.CopyFrom(it) + for _, tag := range primary.Tagger().Tags() { + nt.AddFixed(tag, val) + } + it.Close() + return newIt, true + } + case sqlBuilderType: + newit, err := linksto(primary.(*StatementIterator), it.Direction()) + if err != nil { + glog.Errorln(err) + return it, false + } + newit.Tagger().CopyFrom(it) + return newit, true + case graph.All: + newit := &StatementIterator{ + uid: iterator.NextUID(), + qs: qs, + stType: link, + size: qs.Size(), + } + for _, t := range primary.Tagger().Tags() { + newit.tags = append(newit.tags, tag{ + pair: tableDir{"", it.Direction()}, + t: t, + }) + } + for k, v := range primary.Tagger().Fixed() { + newit.tagger.AddFixed(k, v) + } + newit.tagger.CopyFrom(it) + + return newit, true + } + return it, false +} + +func (qs *QuadStore) optimizeAnd(it *iterator.And) (graph.Iterator, bool) { + subs := it.SubIterators() + var unusedIts []graph.Iterator + var newit *StatementIterator + newit = nil + changed := false + var err error + + for _, it := range subs { + if it.Type() == sqlBuilderType { + if newit == nil { + newit = it.(*StatementIterator) + } else { + changed = true + newit, err = intersect(newit, it.(*StatementIterator)) + if err != nil { + glog.Error(err) + return it, false + } + } + } else { + unusedIts = append(unusedIts, it) + } + } + + if !changed { + return it, false + } + if len(unusedIts) == 0 { + newit.tagger.CopyFrom(it) + return newit, true + } + newAnd := iterator.NewAnd(qs) + newAnd.Tagger().CopyFrom(it) + newAnd.AddSubIterator(newit) + for _, i := range unusedIts { + newAnd.AddSubIterator(i) + } + return newAnd.Optimize() +} + +func (qs *QuadStore) optimizeHasA(it *iterator.HasA) (graph.Iterator, bool) { + subs := it.SubIterators() + if len(subs) != 1 { + return it, false + } + primary := subs[0] + if primary.Type() == sqlBuilderType { + newit, err := hasa(primary.(*StatementIterator), it.Direction()) + if err != nil { + glog.Errorln(err) + return it, false + } + newit.Tagger().CopyFrom(it) + return newit, true + } + return it, false +} diff --git a/graph/sql/optimizers_test.go b/graph/sql/optimizers_test.go new file mode 100644 index 0000000..916fa25 --- /dev/null +++ b/graph/sql/optimizers_test.go @@ -0,0 +1,128 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "fmt" + "testing" + + "github.com/google/cayley/graph" + "github.com/google/cayley/quad" +) + +func TestBuildIntersect(t *testing.T) { + a := NewStatementIterator(nil, quad.Subject, "Foo") + b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + it, err := intersect(a, b) + if err != nil { + t.Error(err) + } + s, v := it.buildQuery(false, nil) + fmt.Println(s, v) +} + +func TestBuildHasa(t *testing.T) { + a := NewStatementIterator(nil, quad.Subject, "Foo") + a.tagger.Add("foo") + b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + it1, err := intersect(a, b) + if err != nil { + t.Error(err) + } + it2, err := hasa(it1, quad.Object) + if err != nil { + t.Error(err) + } + s, v := it2.buildQuery(false, nil) + fmt.Println(s, v) +} + +func TestBuildLinksTo(t *testing.T) { + a := NewStatementIterator(nil, quad.Subject, "Foo") + b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + it1, err := intersect(a, b) + if err != nil { + t.Error(err) + } + it2, err := hasa(it1, quad.Object) + it2.tagger.Add("foo") + if err != nil { + t.Error(err) + } + it3, err := linksto(it2, quad.Subject) + if err != nil { + t.Error(err) + } + s, v := it3.buildQuery(false, nil) + fmt.Println(s, v) +} + +func TestInterestingQuery(t *testing.T) { + if *dbpath == "" { + t.SkipNow() + } + db, err := newQuadStore(*dbpath, nil) + if err != nil { + t.Fatal(err) + } + a := NewStatementIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") + b := NewStatementIterator(db.(*QuadStore), quad.Predicate, "name") + it1, err := intersect(a, b) + if err != nil { + t.Error(err) + } + it2, err := hasa(it1, quad.Subject) + if err != nil { + t.Error(err) + } + it2.Tagger().Add("hb") + it3, err := linksto(it2, quad.Object) + if err != nil { + t.Error(err) + } + b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/performance/actor") + it4, err := intersect(it3, b) + if err != nil { + t.Error(err) + } + it5, err := hasa(it4, quad.Subject) + if err != nil { + t.Error(err) + } + it6, err := linksto(it5, quad.Object) + if err != nil { + t.Error(err) + } + b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/film/starring") + it7, err := intersect(it6, b) + if err != nil { + t.Error(err) + } + it8, err := hasa(it7, quad.Subject) + if err != nil { + t.Error(err) + } + s, v := it8.buildQuery(false, nil) + it8.Tagger().Add("id") + fmt.Println(s, v) + for graph.Next(it8) { + fmt.Println(it8.Result()) + out := make(map[string]graph.Value) + it8.TagResults(out) + for k, v := range out { + fmt.Printf("%s: %v\n", k, v.(string)) + } + } +} diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index 26d1ff8..0cad60d 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -260,41 +260,6 @@ func (qs *QuadStore) Type() string { return QuadStoreType } -func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { - switch it.Type() { - case graph.LinksTo: - return qs.optimizeLinksTo(it.(*iterator.LinksTo)) - - } - return it, false -} - -func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool) { - subs := it.SubIterators() - if len(subs) != 1 { - return it, false - } - primary := subs[0] - if primary.Type() == graph.Fixed { - size, _ := primary.Size() - if size == 1 { - if !graph.Next(primary) { - panic("unexpected size during optimize") - } - val := primary.Result() - newIt := qs.QuadIterator(it.Direction(), val) - nt := newIt.Tagger() - nt.CopyFrom(it) - for _, tag := range primary.Tagger().Tags() { - nt.AddFixed(tag, val) - } - it.Close() - return newIt, true - } - } - return it, false -} - func (qs *QuadStore) sizeForIterator(isAll bool, dir quad.Direction, val string) int64 { var err error if isAll { From 7153a766c1fe02d171d5b15deda03f42c19d1146 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Thu, 23 Jul 2015 16:01:20 -0400 Subject: [PATCH 05/18] Add new builder iterators, v2 Subcommits: link iterator next/contains implement sql_node_iterator next/buildsql fix optimizers --- graph/sql/optimizers.go | 242 +++++++---------- graph/sql/optimizers_test.go | 42 +-- graph/sql/quadstore.go | 2 +- graph/sql/sql_link_iterator.go | 525 ++++++++++++++++++++++++++++++++++++ graph/sql/sql_link_iterator_test.go | 87 ++++++ graph/sql/sql_node_iterator.go | 435 ++++++++++++++++++++++++++++++ integration/integration_test.go | 9 +- 7 files changed, 1173 insertions(+), 169 deletions(-) create mode 100644 graph/sql/sql_link_iterator.go create mode 100644 graph/sql/sql_link_iterator_test.go create mode 100644 graph/sql/sql_node_iterator.go diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index c07741b..a109d6a 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -23,141 +23,86 @@ import ( "github.com/google/cayley/quad" ) -func intersect(a *StatementIterator, b *StatementIterator) (*StatementIterator, error) { - if a.stType != b.stType { - return nil, errors.New("Cannot combine SQL iterators of two different types") - } - min := a.size - if b.size < a.size { - min = b.size - } - var where clause - if a.where == nil { - if b.where == nil { - where = nil +func intersect(a graph.Iterator, b graph.Iterator) (graph.Iterator, error) { + if anew, ok := a.(*SQLNodeIterator); ok { + if bnew, ok := b.(*SQLNodeIterator); ok { + return intersectNode(anew, bnew) } - where = b.where + } else if anew, ok := a.(*SQLLinkIterator); ok { + if bnew, ok := b.(*SQLLinkIterator); ok { + return intersectLink(anew, bnew) + } + } else { - if b.where == nil { - where = a.where - } - where = joinClause{a.where, b.where, andClause} + return nil, errors.New("Unknown iterator types") } - out := &StatementIterator{ - uid: iterator.NextUID(), - qs: a.qs, - buildWhere: append(a.buildWhere, b.buildWhere...), - tags: append(a.tags, b.tags...), - where: where, - stType: a.stType, - size: min, - dir: a.dir, - } - out.tagger.CopyFrom(a) - out.tagger.CopyFrom(b) - if out.stType == node { - out.buildWhere = append(out.buildWhere, baseClause{ - pair: tableDir{"", a.dir}, - target: tableDir{b.tableName(), b.dir}, - }) - } - return out, nil + return nil, errors.New("Cannot combine SQL iterators of two different types") } -func hasa(a *StatementIterator, d quad.Direction) (*StatementIterator, error) { - if a.stType != link { +func intersectNode(a *SQLNodeIterator, b *SQLNodeIterator) (graph.Iterator, error) { + m := &SQLNodeIterator{ + uid: iterator.NextUID(), + qs: a.qs, + tableName: newTableName(), + linkIts: append(a.linkIts, b.linkIts...), + tagdirs: append(a.tagdirs, b.tagdirs...), + } + m.Tagger().CopyFrom(a) + m.Tagger().CopyFrom(b) + return m, nil +} + +func intersectLink(a *SQLLinkIterator, b *SQLLinkIterator) (graph.Iterator, error) { + m := &SQLLinkIterator{ + uid: iterator.NextUID(), + qs: a.qs, + tableName: newTableName(), + nodeIts: append(a.nodeIts, b.nodeIts...), + constraints: append(a.constraints, b.constraints...), + } + m.Tagger().CopyFrom(a) + m.Tagger().CopyFrom(b) + return m, nil +} + +func hasa(aIn graph.Iterator, d quad.Direction) (graph.Iterator, error) { + a, ok := aIn.(*SQLLinkIterator) + if !ok { return nil, errors.New("Can't take the HASA of a link SQL iterator") } - out := &StatementIterator{ - uid: iterator.NextUID(), - qs: a.qs, - stType: node, - dir: d, - } - where := a.where - for _, w := range a.buildWhere { - w.pair.table = out.tableName() - wherenew := joinClause{where, w, andClause} - where = wherenew - } - out.where = where - //out := &StatementIterator{ - //uid: iterator.NextUID(), - //qs: a.qs, - //stType: node, - //dir: d, - //buildWhere: a.buildWhere, - //where: a.where, - //size: -1, - //} - for k, v := range a.tagger.Fixed() { - out.tagger.AddFixed(k, v) - } - var tags []tag - for _, t := range a.tagger.Tags() { - tags = append(tags, tag{ - pair: tableDir{ - table: out.tableName(), - dir: quad.Any, - }, - t: t, - }) - } - out.tags = append(tags, a.tags...) - return out, nil -} - -func linksto(a *StatementIterator, d quad.Direction) (*StatementIterator, error) { - if a.stType != node { - return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") - } - out := &StatementIterator{ - uid: iterator.NextUID(), - qs: a.qs, - stType: link, - dir: d, - size: -1, - } - where := a.where - for _, w := range a.buildWhere { - w.pair.table = a.tableName() - wherenew := joinClause{where, w, andClause} - where = wherenew - } - - out.where = where - out.buildWhere = []baseClause{ - baseClause{ - pair: tableDir{ + out := &SQLNodeIterator{ + uid: iterator.NextUID(), + qs: a.qs, + tableName: newTableName(), + linkIts: []sqlItDir{ + sqlItDir{ + it: a, dir: d, }, - target: tableDir{ - table: a.tableName(), - dir: a.dir, - }, }, } - var tags []tag - for _, t := range a.tagger.Tags() { - tags = append(tags, tag{ - pair: tableDir{ - table: a.tableName(), - dir: a.dir, + return out, nil +} + +func linksto(aIn graph.Iterator, d quad.Direction) (graph.Iterator, error) { + a, ok := aIn.(*SQLNodeIterator) + if !ok { + return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") + } + + out := &SQLLinkIterator{ + uid: iterator.NextUID(), + qs: a.qs, + tableName: newTableName(), + nodeIts: []sqlItDir{ + sqlItDir{ + it: a, + dir: d, }, - t: t, - }) + }, } - for k, v := range a.tagger.Fixed() { - out.tagger.AddFixed(k, v) - } - for _, t := range a.tags { - if t.pair.table == "" { - t.pair.table = a.tableName() - } - tags = append(tags, t) - } - out.tags = tags + return out, nil } @@ -196,33 +141,34 @@ func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool it.Close() return newIt, true } - case sqlBuilderType: - newit, err := linksto(primary.(*StatementIterator), it.Direction()) + case sqlNodeType: + //p := primary.(*SQLNodeIterator) + newit, err := linksto(primary, it.Direction()) if err != nil { glog.Errorln(err) return it, false } newit.Tagger().CopyFrom(it) return newit, true - case graph.All: - newit := &StatementIterator{ - uid: iterator.NextUID(), - qs: qs, - stType: link, - size: qs.Size(), - } - for _, t := range primary.Tagger().Tags() { - newit.tags = append(newit.tags, tag{ - pair: tableDir{"", it.Direction()}, - t: t, - }) - } - for k, v := range primary.Tagger().Fixed() { - newit.tagger.AddFixed(k, v) - } - newit.tagger.CopyFrom(it) + //case graph.All: + //newit := &StatementIterator{ + //uid: iterator.NextUID(), + //qs: qs, + //stType: link, + //size: qs.Size(), + //} + //for _, t := range primary.Tagger().Tags() { + //newit.tags = append(newit.tags, tag{ + //pair: tableDir{"", it.Direction()}, + //t: t, + //}) + //} + //for k, v := range primary.Tagger().Fixed() { + //newit.tagger.AddFixed(k, v) + //} + //newit.tagger.CopyFrom(it) - return newit, true + //return newit, true } return it, false } @@ -230,18 +176,18 @@ func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool func (qs *QuadStore) optimizeAnd(it *iterator.And) (graph.Iterator, bool) { subs := it.SubIterators() var unusedIts []graph.Iterator - var newit *StatementIterator + var newit graph.Iterator newit = nil changed := false var err error for _, it := range subs { - if it.Type() == sqlBuilderType { + if it.Type() == sqlLinkType || it.Type() == sqlNodeType { if newit == nil { - newit = it.(*StatementIterator) + newit = it } else { changed = true - newit, err = intersect(newit, it.(*StatementIterator)) + newit, err = intersect(newit, it) if err != nil { glog.Error(err) return it, false @@ -256,7 +202,7 @@ func (qs *QuadStore) optimizeAnd(it *iterator.And) (graph.Iterator, bool) { return it, false } if len(unusedIts) == 0 { - newit.tagger.CopyFrom(it) + newit.Tagger().CopyFrom(it) return newit, true } newAnd := iterator.NewAnd(qs) @@ -274,8 +220,8 @@ func (qs *QuadStore) optimizeHasA(it *iterator.HasA) (graph.Iterator, bool) { return it, false } primary := subs[0] - if primary.Type() == sqlBuilderType { - newit, err := hasa(primary.(*StatementIterator), it.Direction()) + if primary.Type() == sqlLinkType { + newit, err := hasa(primary, it.Direction()) if err != nil { glog.Errorln(err) return it, false diff --git a/graph/sql/optimizers_test.go b/graph/sql/optimizers_test.go index 916fa25..229b91e 100644 --- a/graph/sql/optimizers_test.go +++ b/graph/sql/optimizers_test.go @@ -23,41 +23,43 @@ import ( ) func TestBuildIntersect(t *testing.T) { - a := NewStatementIterator(nil, quad.Subject, "Foo") - b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + a := NewSQLLinkIterator(nil, quad.Subject, "Foo") + b := NewSQLLinkIterator(nil, quad.Predicate, "is_equivalent_to") it, err := intersect(a, b) + i := it.(*SQLLinkIterator) if err != nil { t.Error(err) } - s, v := it.buildQuery(false, nil) + s, v := i.buildSQL(true, nil) fmt.Println(s, v) } func TestBuildHasa(t *testing.T) { - a := NewStatementIterator(nil, quad.Subject, "Foo") + a := NewSQLLinkIterator(nil, quad.Subject, "Foo") a.tagger.Add("foo") - b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + b := NewSQLLinkIterator(nil, quad.Predicate, "is_equivalent_to") it1, err := intersect(a, b) if err != nil { t.Error(err) } it2, err := hasa(it1, quad.Object) + i2 := it2.(*SQLNodeIterator) if err != nil { t.Error(err) } - s, v := it2.buildQuery(false, nil) + s, v := i2.buildSQL(true, nil) fmt.Println(s, v) } func TestBuildLinksTo(t *testing.T) { - a := NewStatementIterator(nil, quad.Subject, "Foo") - b := NewStatementIterator(nil, quad.Predicate, "is_equivalent_to") + a := NewSQLLinkIterator(nil, quad.Subject, "Foo") + b := NewSQLLinkIterator(nil, quad.Predicate, "is_equivalent_to") it1, err := intersect(a, b) if err != nil { t.Error(err) } it2, err := hasa(it1, quad.Object) - it2.tagger.Add("foo") + it2.Tagger().Add("foo") if err != nil { t.Error(err) } @@ -65,7 +67,8 @@ func TestBuildLinksTo(t *testing.T) { if err != nil { t.Error(err) } - s, v := it3.buildQuery(false, nil) + i3 := it3.(*SQLLinkIterator) + s, v := i3.buildSQL(true, nil) fmt.Println(s, v) } @@ -77,8 +80,8 @@ func TestInterestingQuery(t *testing.T) { if err != nil { t.Fatal(err) } - a := NewStatementIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") - b := NewStatementIterator(db.(*QuadStore), quad.Predicate, "name") + a := NewSQLLinkIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") + b := NewSQLLinkIterator(db.(*QuadStore), quad.Predicate, "name") it1, err := intersect(a, b) if err != nil { t.Error(err) @@ -92,7 +95,7 @@ func TestInterestingQuery(t *testing.T) { if err != nil { t.Error(err) } - b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/performance/actor") + b = NewSQLLinkIterator(db.(*QuadStore), quad.Predicate, "/film/performance/actor") it4, err := intersect(it3, b) if err != nil { t.Error(err) @@ -105,7 +108,7 @@ func TestInterestingQuery(t *testing.T) { if err != nil { t.Error(err) } - b = NewStatementIterator(db.(*QuadStore), quad.Predicate, "/film/film/starring") + b = NewSQLLinkIterator(db.(*QuadStore), quad.Predicate, "/film/film/starring") it7, err := intersect(it6, b) if err != nil { t.Error(err) @@ -114,13 +117,14 @@ func TestInterestingQuery(t *testing.T) { if err != nil { t.Error(err) } - s, v := it8.buildQuery(false, nil) - it8.Tagger().Add("id") + finalIt := it8.(*SQLNodeIterator) + s, v := finalIt.buildSQL(true, nil) + finalIt.Tagger().Add("id") fmt.Println(s, v) - for graph.Next(it8) { - fmt.Println(it8.Result()) + for graph.Next(finalIt) { + fmt.Println(finalIt.Result()) out := make(map[string]graph.Value) - it8.TagResults(out) + finalIt.TagResults(out) for k, v := range out { fmt.Printf("%s: %v\n", k, v.(string)) } diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index 0cad60d..3181f2b 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -200,7 +200,7 @@ func (qs *QuadStore) Quad(val graph.Value) quad.Quad { } func (qs *QuadStore) QuadIterator(d quad.Direction, val graph.Value) graph.Iterator { - return NewStatementIterator(qs, d, val.(string)) + return NewSQLLinkIterator(qs, d, val.(string)) } func (qs *QuadStore) NodesAllIterator() graph.Iterator { diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go new file mode 100644 index 0000000..5f913d3 --- /dev/null +++ b/graph/sql/sql_link_iterator.go @@ -0,0 +1,525 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "database/sql" + "fmt" + "strings" + "sync/atomic" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +var sqlLinkType graph.Type +var sqlTableID uint64 + +func init() { + sqlLinkType = graph.RegisterIterator("sqllink") + sqlNodeType = graph.RegisterIterator("sqlnode") + atomic.StoreUint64(&sqlTableID, 0) +} + +func newTableName() string { + id := atomic.AddUint64(&sqlTableID, 1) + return fmt.Sprintf("t_%d", id) +} + +type constraint struct { + dir quad.Direction + vals []string +} + +type tagDir struct { + tag string + dir quad.Direction + + // Not to be stored in the iterator directly + table string +} + +type sqlItDir struct { + dir quad.Direction + it sqlIterator +} + +type sqlIterator interface { + sqlClone() sqlIterator + getTables() []string + getTags() []tagDir + buildWhere() (string, []string) + tableID() tagDir + height() int +} + +type SQLLinkIterator struct { + uid uint64 + qs *QuadStore + tagger graph.Tagger + err error + next bool + + cursor *sql.Rows + nodeIts []sqlItDir + constraints []constraint + tableName string + size int64 + + result map[string]string + resultIndex int + resultList [][]string + resultNext [][]string + cols []string + resultQuad quad.Quad +} + +func NewSQLLinkIterator(qs *QuadStore, d quad.Direction, val string) *SQLLinkIterator { + l := &SQLLinkIterator{ + uid: iterator.NextUID(), + qs: qs, + constraints: []constraint{ + constraint{ + dir: d, + vals: []string{val}, + }, + }, + tableName: newTableName(), + size: 0, + } + return l +} + +func (l *SQLLinkIterator) sqlClone() sqlIterator { + return l.Clone().(*SQLLinkIterator) +} + +func (l *SQLLinkIterator) Clone() graph.Iterator { + m := &SQLLinkIterator{ + uid: iterator.NextUID(), + qs: l.qs, + tableName: l.tableName, + size: l.size, + } + for _, i := range l.nodeIts { + m.nodeIts = append(m.nodeIts, sqlItDir{ + dir: i.dir, + it: i.it.sqlClone(), + }) + } + m.constraints = l.constraints[:] + m.tagger.CopyFrom(l) + return m +} + +func (l *SQLLinkIterator) UID() uint64 { + return l.uid +} + +func (l *SQLLinkIterator) Reset() { + l.err = nil + l.Close() +} + +func (l *SQLLinkIterator) Err() error { + return l.err +} + +func (l *SQLLinkIterator) Close() error { + if l.cursor != nil { + err := l.cursor.Close() + if err != nil { + return err + } + l.cursor = nil + } + return nil +} + +func (l *SQLLinkIterator) Tagger() *graph.Tagger { + return &l.tagger +} + +func (l *SQLLinkIterator) Result() graph.Value { + return l.resultQuad +} + +func (l *SQLLinkIterator) TagResults(dst map[string]graph.Value) { + for tag, value := range l.result { + if tag == "__execd" { + for _, tag := range l.tagger.Tags() { + dst[tag] = value + } + continue + } + dst[tag] = value + } + + for tag, value := range l.tagger.Fixed() { + dst[tag] = value + } +} + +func (l *SQLLinkIterator) SubIterators() []graph.Iterator { + // TODO(barakmich): SQL Subiterators shouldn't count? If it makes sense, + // there's no reason not to expose them though. + return nil +} + +func (l *SQLLinkIterator) Sorted() bool { return false } +func (l *SQLLinkIterator) Optimize() (graph.Iterator, bool) { return l, false } + +func (l *SQLLinkIterator) Size() (int64, bool) { + if l.size != 0 { + return l.size, true + } + if len(l.constraints) > 0 { + l.size = l.qs.sizeForIterator(false, l.constraints[0].dir, l.constraints[0].vals[0]) + } else { + return l.qs.Size(), false + } + return l.size, true +} + +func (l *SQLLinkIterator) Describe() graph.Description { + size, _ := l.Size() + return graph.Description{ + UID: l.UID(), + Name: fmt.Sprintf("SQL_LINK_QUERY: %#v", l), + Type: l.Type(), + Size: size, + } +} + +func (l *SQLLinkIterator) Stats() graph.IteratorStats { + size, _ := l.Size() + return graph.IteratorStats{ + ContainsCost: 1, + NextCost: 5, + Size: size, + } +} + +func (l *SQLLinkIterator) Type() graph.Type { + return sqlLinkType +} + +func (l *SQLLinkIterator) Contains(v graph.Value) bool { + var err error + //if it.preFilter(v) { + //return false + //} + err = l.makeCursor(false, v) + if err != nil { + glog.Errorf("Couldn't make query: %v", err) + l.err = err + l.cursor.Close() + return false + } + l.cols, err = l.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + l.err = err + l.cursor.Close() + return false + } + l.resultList = nil + for { + if !l.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := l.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + l.err = err + } + l.cursor.Close() + break + } + s, err := scan(l.cursor, len(l.cols)) + if err != nil { + l.err = err + l.cursor.Close() + return false + } + l.resultList = append(l.resultList, s) + } + l.cursor.Close() + l.cursor = nil + if len(l.resultList) != 0 { + l.resultIndex = 0 + l.buildResult(0) + return true + } + return false +} + +func (l *SQLLinkIterator) NextPath() bool { + l.resultIndex += 1 + if l.resultIndex >= len(l.resultList) { + return false + } + l.buildResult(l.resultIndex) + return true +} + +func (l *SQLLinkIterator) buildResult(i int) { + container := l.resultList[i] + var q quad.Quad + q.Subject = container[0] + q.Predicate = container[1] + q.Object = container[2] + q.Label = container[3] + l.resultQuad = q + l.result = make(map[string]string) + for i, c := range l.cols[4:] { + l.result[c] = container[i+4] + } +} + +func (l *SQLLinkIterator) getTables() []string { + out := []string{l.tableName} + //for _, i := range l.nodeIts { + //out = append(out, i.it.getTables()...) + //} + return out +} + +func (l *SQLLinkIterator) height() int { + v := 0 + for _, i := range l.nodeIts { + if i.it.height() > v { + v = i.it.height() + } + } + return v + 1 +} + +func (l *SQLLinkIterator) getTags() []tagDir { + var out []tagDir + for _, tag := range l.tagger.Tags() { + out = append(out, tagDir{ + dir: quad.Any, + table: l.tableName, + tag: tag, + }) + } + //for _, i := range l.nodeIts { + //out = append(out, i.it.getTags()...) + //} + return out +} + +func (l *SQLLinkIterator) buildWhere() (string, []string) { + var q []string + var vals []string + for _, c := range l.constraints { + q = append(q, fmt.Sprintf("%s.%s = ?", l.tableName, c.dir)) + vals = append(vals, c.vals[0]) + } + for _, i := range l.nodeIts { + sni := i.it.(*SQLNodeIterator) + sql, s := sni.buildSQL(true, nil) + q = append(q, fmt.Sprintf("%s.%s in (%s)", l.tableName, i.dir, sql[:len(sql)-1])) + vals = append(vals, s...) + //q = append(q, fmt.Sprintf("%s.%s = %s.%s", l.tableName, i.dir, t.table, t.dir)) + } + //for _, i := range l.nodeIts { + //s, v := i.it.buildWhere() + //q = append(q, s) + //vals = append(vals, v...) + //} + query := strings.Join(q, " AND ") + return query, vals +} + +func (l *SQLLinkIterator) tableID() tagDir { + return tagDir{ + dir: quad.Any, + table: l.tableName, + } +} + +func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string) { + query := "SELECT " + t := []string{ + fmt.Sprintf("%s.subject", l.tableName), + fmt.Sprintf("%s.predicate", l.tableName), + fmt.Sprintf("%s.object", l.tableName), + fmt.Sprintf("%s.label", l.tableName), + } + for _, v := range l.getTags() { + t = append(t, fmt.Sprintf("%s.%s as %s", v.table, v.dir, v.tag)) + } + query += strings.Join(t, ", ") + query += " FROM " + t = []string{} + for _, k := range l.getTables() { + t = append(t, fmt.Sprintf("quads as %s", k)) + } + query += strings.Join(t, ", ") + query += " WHERE " + l.next = next + constraint, values := l.buildWhere() + + if !next { + v := val.(quad.Quad) + if constraint != "" { + constraint += " AND " + } + t = []string{ + fmt.Sprintf("%s.subject = ?", l.tableName), + fmt.Sprintf("%s.predicate = ?", l.tableName), + fmt.Sprintf("%s.object = ?", l.tableName), + fmt.Sprintf("%s.label = ?", l.tableName), + } + constraint += strings.Join(t, " AND ") + values = append(values, v.Subject) + values = append(values, v.Predicate) + values = append(values, v.Object) + values = append(values, v.Label) + } + query += constraint + query += ";" + + glog.V(2).Infoln(query) + + if glog.V(4) { + dstr := query + for i := 1; i <= len(values); i++ { + dstr = strings.Replace(dstr, "?", fmt.Sprintf("'%s'", values[i-1]), 1) + } + glog.V(4).Infoln(dstr) + } + return query, values +} + +func convertToPostgres(query string, values []string) string { + for i := 1; i <= len(values); i++ { + query = strings.Replace(query, "?", fmt.Sprintf("$%d", i), 1) + } + return query +} + +func (l *SQLLinkIterator) makeCursor(next bool, value graph.Value) error { + if l.cursor != nil { + l.cursor.Close() + } + var q string + var values []string + q, values = l.buildSQL(next, value) + q = convertToPostgres(q, values) + ivalues := make([]interface{}, 0, len(values)) + for _, v := range values { + ivalues = append(ivalues, v) + } + cursor, err := l.qs.db.Query(q, ivalues...) + if err != nil { + glog.Errorf("Couldn't get cursor from SQL database: %v", err) + cursor = nil + return err + } + l.cursor = cursor + return nil +} + +func scan(cursor *sql.Rows, nCols int) ([]string, error) { + pointers := make([]interface{}, nCols) + container := make([]string, nCols) + for i, _ := range pointers { + pointers[i] = &container[i] + } + err := cursor.Scan(pointers...) + if err != nil { + glog.Errorf("Error scanning iterator: %v", err) + return nil, err + } + return container, nil +} + +func (l *SQLLinkIterator) Next() bool { + var err error + graph.NextLogIn(l) + if l.cursor == nil { + err = l.makeCursor(true, nil) + l.cols, err = l.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + l.err = err + l.cursor.Close() + return false + } + // iterate the first one + if !l.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := l.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + l.err = err + } + l.cursor.Close() + return false + } + s, err := scan(l.cursor, len(l.cols)) + if err != nil { + l.err = err + l.cursor.Close() + return false + } + l.resultNext = append(l.resultNext, s) + } + if l.resultList != nil && l.resultNext == nil { + // We're on something and there's no next + return false + } + l.resultList = l.resultNext + l.resultNext = nil + l.resultIndex = 0 + for { + if !l.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := l.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + l.err = err + } + l.cursor.Close() + break + } + s, err := scan(l.cursor, len(l.cols)) + if err != nil { + l.err = err + l.cursor.Close() + return false + } + if l.resultList[0][0] == s[0] && l.resultList[0][1] == s[1] && l.resultList[0][2] == s[2] && l.resultList[0][3] == s[3] { + l.resultList = append(l.resultList, s) + } else { + l.resultNext = append(l.resultNext, s) + break + } + + } + if len(l.resultList) == 0 { + return graph.NextLogOut(l, nil, false) + } + l.buildResult(0) + return graph.NextLogOut(l, l.Result(), true) +} + +type SQLAllIterator struct { + // TBD +} diff --git a/graph/sql/sql_link_iterator_test.go b/graph/sql/sql_link_iterator_test.go new file mode 100644 index 0000000..b13e389 --- /dev/null +++ b/graph/sql/sql_link_iterator_test.go @@ -0,0 +1,87 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "fmt" + "testing" + + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +func TestSQLLink(t *testing.T) { + it := NewSQLLinkIterator(nil, quad.Object, "cool") + s, v := it.buildSQL(true, nil) + fmt.Println(s, v) +} + +func TestSQLLinkIteration(t *testing.T) { + if *dbpath == "" { + t.SkipNow() + } + db, err := newQuadStore(*dbpath, nil) + if err != nil { + t.Fatal(err) + } + it := NewSQLLinkIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") + for graph.Next(it) { + fmt.Println(it.Result()) + } + it = NewSQLLinkIterator(db.(*QuadStore), quad.Subject, "/en/casablanca_1942") + s, v := it.buildSQL(true, nil) + fmt.Println(s, v) + c := 0 + for graph.Next(it) { + fmt.Println(it.Result()) + c += 1 + } + if c != 18 { + t.Errorf("Not enough results, got %d expected 18", c) + } +} + +func TestSQLNodeIteration(t *testing.T) { + if *dbpath == "" { + t.SkipNow() + } + db, err := newQuadStore(*dbpath, nil) + if err != nil { + t.Fatal(err) + } + link := NewSQLLinkIterator(db.(*QuadStore), quad.Object, "/en/humphrey_bogart") + it := &SQLNodeIterator{ + uid: iterator.NextUID(), + qs: db.(*QuadStore), + tableName: newTableName(), + linkIts: []sqlItDir{ + sqlItDir{it: link, + dir: quad.Subject, + }, + }, + } + s, v := it.buildSQL(true, nil) + fmt.Println(s, v) + c := 0 + for graph.Next(it) { + fmt.Println(it.Result()) + c += 1 + } + if c != 56 { + t.Errorf("Not enough results, got %d expected 56", c) + } + +} diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go new file mode 100644 index 0000000..e276399 --- /dev/null +++ b/graph/sql/sql_node_iterator.go @@ -0,0 +1,435 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "database/sql" + "fmt" + "strings" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +var sqlNodeType graph.Type + +func init() { + sqlNodeType = graph.RegisterIterator("sqlnode") +} + +type SQLNodeIterator struct { + uid uint64 + qs *QuadStore + tagger graph.Tagger + tableName string + err error + + cursor *sql.Rows + linkIts []sqlItDir + size int64 + tagdirs []tagDir + + result map[string]string + resultIndex int + resultList [][]string + resultNext [][]string + cols []string +} + +func (n *SQLNodeIterator) sqlClone() sqlIterator { + return n.Clone().(*SQLNodeIterator) +} + +func (n *SQLNodeIterator) Clone() graph.Iterator { + m := &SQLNodeIterator{ + uid: iterator.NextUID(), + qs: n.qs, + size: n.size, + tableName: n.tableName, + } + for _, i := range n.linkIts { + m.linkIts = append(m.linkIts, sqlItDir{ + dir: i.dir, + it: i.it.sqlClone(), + }) + } + copy(n.tagdirs, m.tagdirs) + m.tagger.CopyFrom(n) + return m +} + +func (n *SQLNodeIterator) UID() uint64 { + return n.uid +} + +func (n *SQLNodeIterator) Reset() { + n.err = nil + n.Close() +} + +func (n *SQLNodeIterator) Err() error { + return n.err +} + +func (n *SQLNodeIterator) Close() error { + if n.cursor != nil { + err := n.cursor.Close() + if err != nil { + return err + } + n.cursor = nil + } + return nil +} + +func (n *SQLNodeIterator) Tagger() *graph.Tagger { + return &n.tagger +} + +func (n *SQLNodeIterator) Result() graph.Value { + return n.result["__execd"] +} + +func (n *SQLNodeIterator) TagResults(dst map[string]graph.Value) { + for tag, value := range n.result { + if tag == "__execd" { + for _, tag := range n.tagger.Tags() { + dst[tag] = value + } + continue + } + dst[tag] = value + } + + for tag, value := range n.tagger.Fixed() { + dst[tag] = value + } +} + +func (n *SQLNodeIterator) Type() graph.Type { + return sqlNodeType +} + +func (n *SQLNodeIterator) SubIterators() []graph.Iterator { + // TODO(barakmich): SQL Subiterators shouldn't count? If it makes sense, + // there's no reason not to expose them though. + return nil +} + +func (n *SQLNodeIterator) Sorted() bool { return false } +func (n *SQLNodeIterator) Optimize() (graph.Iterator, bool) { return n, false } + +func (n *SQLNodeIterator) Size() (int64, bool) { + return n.qs.Size() / int64(len(n.linkIts)+1), true +} + +func (n *SQLNodeIterator) Describe() graph.Description { + size, _ := n.Size() + return graph.Description{ + UID: n.UID(), + Name: fmt.Sprintf("SQL_NODE_QUERY: %#v", n), + Type: n.Type(), + Size: size, + } +} + +func (n *SQLNodeIterator) Stats() graph.IteratorStats { + size, _ := n.Size() + return graph.IteratorStats{ + ContainsCost: 1, + NextCost: 5, + Size: size, + } +} + +func (n *SQLNodeIterator) NextPath() bool { + n.resultIndex += 1 + if n.resultIndex >= len(n.resultList) { + return false + } + n.buildResult(n.resultIndex) + return true +} + +func (n *SQLNodeIterator) buildResult(i int) { + container := n.resultList[i] + n.result = make(map[string]string) + for i, c := range n.cols { + n.result[c] = container[i] + } +} + +func (n *SQLNodeIterator) getTables() []string { + var out []string + for _, i := range n.linkIts { + out = append(out, i.it.getTables()...) + } + if len(out) == 0 { + out = append(out, n.tableName) + } + return out +} + +func (n *SQLNodeIterator) tableID() tagDir { + if len(n.linkIts) == 0 { + return tagDir{ + table: n.tableName, + dir: quad.Any, + } + } + return tagDir{ + table: n.linkIts[0].it.tableID().table, + dir: n.linkIts[0].dir, + } +} + +func (n *SQLNodeIterator) getTags() []tagDir { + myTag := n.tableID() + var out []tagDir + for _, tag := range n.tagger.Tags() { + out = append(out, tagDir{ + dir: myTag.dir, + table: myTag.table, + tag: tag, + }) + } + for _, tag := range n.tagdirs { + out = append(out, tagDir{ + dir: tag.dir, + table: myTag.table, + tag: tag.tag, + }) + + } + for _, i := range n.linkIts { + out = append(out, i.it.getTags()...) + } + return out +} + +func (n *SQLNodeIterator) height() int { + v := 0 + for _, i := range n.linkIts { + if i.it.height() > v { + v = i.it.height() + } + } + return v + 1 +} + +func (n *SQLNodeIterator) buildWhere() (string, []string) { + var q []string + var vals []string + if len(n.linkIts) > 1 { + baseTable := n.linkIts[0].it.tableID().table + baseDir := n.linkIts[0].dir + for _, i := range n.linkIts[1:] { + table := i.it.tableID().table + dir := i.dir + q = append(q, fmt.Sprintf("%s.%s = %s.%s", baseTable, baseDir, table, dir)) + } + } + for _, i := range n.linkIts { + s, v := i.it.buildWhere() + q = append(q, s) + vals = append(vals, v...) + } + query := strings.Join(q, " AND ") + return query, vals +} + +func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string) { + topData := n.tableID() + query := "SELECT " + var t []string + t = append(t, fmt.Sprintf("%s.%s as __execd", topData.table, topData.dir)) + for _, v := range n.getTags() { + t = append(t, fmt.Sprintf("%s.%s as %s", v.table, v.dir, v.tag)) + } + query += strings.Join(t, ", ") + query += " FROM " + t = []string{} + for _, k := range n.getTables() { + t = append(t, fmt.Sprintf("quads as %s", k)) + } + query += strings.Join(t, ", ") + query += " WHERE " + constraint, values := n.buildWhere() + + if !next { + v := val.(string) + if constraint != "" { + constraint += " AND " + } + constraint += fmt.Sprintf("%s.%s = ?", topData.table, topData.dir) + values = append(values, v) + } + query += constraint + query += ";" + + glog.V(2).Infoln(query) + + if glog.V(4) { + dstr := query + for i := 1; i <= len(values); i++ { + dstr = strings.Replace(dstr, "?", fmt.Sprintf("'%s'", values[i-1]), 1) + } + glog.V(4).Infoln(dstr) + } + return query, values +} + +func (n *SQLNodeIterator) Next() bool { + var err error + graph.NextLogIn(n) + if n.cursor == nil { + err = n.makeCursor(true, nil) + n.cols, err = n.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + n.err = err + n.cursor.Close() + return false + } + // iterate the first one + if !n.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := n.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + n.err = err + } + n.cursor.Close() + return false + } + s, err := scan(n.cursor, len(n.cols)) + if err != nil { + n.err = err + n.cursor.Close() + return false + } + n.resultNext = append(n.resultNext, s) + } + if n.resultList != nil && n.resultNext == nil { + // We're on something and there's no next + return false + } + n.resultList = n.resultNext + n.resultNext = nil + n.resultIndex = 0 + for { + if !n.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := n.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + n.err = err + } + n.cursor.Close() + break + } + s, err := scan(n.cursor, len(n.cols)) + if err != nil { + n.err = err + n.cursor.Close() + return false + } + if n.resultList[0][0] != s[0] { + n.resultNext = append(n.resultNext, s) + break + } else { + n.resultList = append(n.resultList, s) + } + + } + if len(n.resultList) == 0 { + return graph.NextLogOut(n, nil, false) + } + n.buildResult(0) + return graph.NextLogOut(n, n.Result(), true) +} + +func (n *SQLNodeIterator) makeCursor(next bool, value graph.Value) error { + if n.cursor != nil { + n.cursor.Close() + } + var q string + var values []string + q, values = n.buildSQL(next, value) + q = convertToPostgres(q, values) + ivalues := make([]interface{}, 0, len(values)) + for _, v := range values { + ivalues = append(ivalues, v) + } + cursor, err := n.qs.db.Query(q, ivalues...) + if err != nil { + glog.Errorf("Couldn't get cursor from SQL database: %v", err) + cursor = nil + return err + } + n.cursor = cursor + return nil +} + +func (n *SQLNodeIterator) Contains(v graph.Value) bool { + var err error + //if it.preFilter(v) { + //return false + //} + err = n.makeCursor(false, v) + if err != nil { + glog.Errorf("Couldn't make query: %v", err) + n.err = err + n.cursor.Close() + return false + } + n.cols, err = n.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + n.err = err + n.cursor.Close() + return false + } + n.resultList = nil + for { + if !n.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := n.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + n.err = err + } + n.cursor.Close() + break + } + s, err := scan(n.cursor, len(n.cols)) + if err != nil { + n.err = err + n.cursor.Close() + return false + } + n.resultList = append(n.resultList, s) + } + n.cursor.Close() + n.cursor = nil + if len(n.resultList) != 0 { + n.resultIndex = 0 + n.buildResult(0) + return true + } + return false +} diff --git a/integration/integration_test.go b/integration/integration_test.go index 05ddac0..76f4178 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -51,11 +51,14 @@ var benchmarkQueries = []struct { long bool query string tag string - expect []interface{} + // for testing + skip bool + expect []interface{} }{ // Easy one to get us started. How quick is the most straightforward retrieval? { message: "name predicate", + skip: true, query: ` g.V("Humphrey Bogart").In("name").All() `, @@ -69,6 +72,7 @@ var benchmarkQueries = []struct { // that's going to be measurably slower for every other backend. { message: "two large sets with no intersection", + skip: true, query: ` function getId(x) { return g.V(x).In("name") } var actor_to_film = g.M().In("/film/performance/actor").In("/film/film/starring") @@ -534,6 +538,9 @@ func checkQueries(t *testing.T) { if testing.Short() && test.long { continue } + if test.skip { + continue + } fmt.Printf("Now testing %s\n", test.message) ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true) _, err := ses.Parse(test.query) From 13d4d8b7b49a853853e6ca21768c4f9b457995d0 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Mon, 27 Jul 2015 13:48:41 -0400 Subject: [PATCH 06/18] revert to non-subquery mode --- graph/sql/sql_link_iterator.go | 52 +++++++++++++++--------------------------- graph/sql/sql_node_iterator.go | 10 -------- 2 files changed, 18 insertions(+), 44 deletions(-) diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go index 5f913d3..1986df1 100644 --- a/graph/sql/sql_link_iterator.go +++ b/graph/sql/sql_link_iterator.go @@ -31,7 +31,6 @@ var sqlTableID uint64 func init() { sqlLinkType = graph.RegisterIterator("sqllink") - sqlNodeType = graph.RegisterIterator("sqlnode") atomic.StoreUint64(&sqlTableID, 0) } @@ -64,7 +63,6 @@ type sqlIterator interface { getTags() []tagDir buildWhere() (string, []string) tableID() tagDir - height() int } type SQLLinkIterator struct { @@ -72,7 +70,6 @@ type SQLLinkIterator struct { qs *QuadStore tagger graph.Tagger err error - next bool cursor *sql.Rows nodeIts []sqlItDir @@ -110,10 +107,11 @@ func (l *SQLLinkIterator) sqlClone() sqlIterator { func (l *SQLLinkIterator) Clone() graph.Iterator { m := &SQLLinkIterator{ - uid: iterator.NextUID(), - qs: l.qs, - tableName: l.tableName, - size: l.size, + uid: iterator.NextUID(), + qs: l.qs, + tableName: l.tableName, + size: l.size, + constraints: make([]constraint, 0, len(l.constraints)), } for _, i := range l.nodeIts { m.nodeIts = append(m.nodeIts, sqlItDir{ @@ -121,7 +119,7 @@ func (l *SQLLinkIterator) Clone() graph.Iterator { it: i.it.sqlClone(), }) } - m.constraints = l.constraints[:] + copy(m.constraints, l.constraints) m.tagger.CopyFrom(l) return m } @@ -292,20 +290,10 @@ func (l *SQLLinkIterator) buildResult(i int) { func (l *SQLLinkIterator) getTables() []string { out := []string{l.tableName} - //for _, i := range l.nodeIts { - //out = append(out, i.it.getTables()...) - //} - return out -} - -func (l *SQLLinkIterator) height() int { - v := 0 for _, i := range l.nodeIts { - if i.it.height() > v { - v = i.it.height() - } + out = append(out, i.it.getTables()...) } - return v + 1 + return out } func (l *SQLLinkIterator) getTags() []tagDir { @@ -317,9 +305,9 @@ func (l *SQLLinkIterator) getTags() []tagDir { tag: tag, }) } - //for _, i := range l.nodeIts { - //out = append(out, i.it.getTags()...) - //} + for _, i := range l.nodeIts { + out = append(out, i.it.getTags()...) + } return out } @@ -331,17 +319,14 @@ func (l *SQLLinkIterator) buildWhere() (string, []string) { vals = append(vals, c.vals[0]) } for _, i := range l.nodeIts { - sni := i.it.(*SQLNodeIterator) - sql, s := sni.buildSQL(true, nil) - q = append(q, fmt.Sprintf("%s.%s in (%s)", l.tableName, i.dir, sql[:len(sql)-1])) - vals = append(vals, s...) - //q = append(q, fmt.Sprintf("%s.%s = %s.%s", l.tableName, i.dir, t.table, t.dir)) + t := i.it.tableID() + q = append(q, fmt.Sprintf("%s.%s = %s.%s", l.tableName, i.dir, t.table, t.dir)) + } + for _, i := range l.nodeIts { + s, v := i.it.buildWhere() + q = append(q, s) + vals = append(vals, v...) } - //for _, i := range l.nodeIts { - //s, v := i.it.buildWhere() - //q = append(q, s) - //vals = append(vals, v...) - //} query := strings.Join(q, " AND ") return query, vals } @@ -372,7 +357,6 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string } query += strings.Join(t, ", ") query += " WHERE " - l.next = next constraint, values := l.buildWhere() if !next { diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index e276399..4f18382 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -221,16 +221,6 @@ func (n *SQLNodeIterator) getTags() []tagDir { return out } -func (n *SQLNodeIterator) height() int { - v := 0 - for _, i := range n.linkIts { - if i.it.height() > v { - v = i.it.height() - } - } - return v + 1 -} - func (n *SQLNodeIterator) buildWhere() (string, []string) { var q []string var vals []string From 425292811b0c22d10c2eab7d8f8f615bd3e102a9 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Mon, 27 Jul 2015 16:53:34 -0400 Subject: [PATCH 07/18] First reasonably fast integration test --- graph/sql/builder_iterator.go | 643 ------------------------------------ graph/sql/builder_iterator_test.go | 110 ------ graph/sql/optimizers.go | 37 +-- graph/sql/sql_link_iterator.go | 89 ++++- graph/sql/sql_link_iterator_test.go | 3 + graph/sql/sql_node_iterator.go | 153 ++++++--- integration/integration_test.go | 7 +- 7 files changed, 209 insertions(+), 833 deletions(-) delete mode 100644 graph/sql/builder_iterator.go delete mode 100644 graph/sql/builder_iterator_test.go diff --git a/graph/sql/builder_iterator.go b/graph/sql/builder_iterator.go deleted file mode 100644 index 867789d..0000000 --- a/graph/sql/builder_iterator.go +++ /dev/null @@ -1,643 +0,0 @@ -// Copyright 2015 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sql - -import ( - "database/sql" - "fmt" - "strings" - - "github.com/barakmich/glog" - "github.com/google/cayley/graph" - "github.com/google/cayley/graph/iterator" - "github.com/google/cayley/quad" -) - -var sqlBuilderType graph.Type - -func init() { - sqlBuilderType = graph.RegisterIterator("sqlbuilder") -} - -type tableDir struct { - table string - dir quad.Direction -} - -func (td tableDir) String() string { - if td.table != "" { - return fmt.Sprintf("%s.%s", td.table, td.dir) - } - return "ERR" -} - -type clause interface { - toSQL() (string, []string) - getTables() map[string]bool - size() int -} - -type baseClause struct { - pair tableDir - strTarget []string - target tableDir -} - -func (b baseClause) toSQL() (string, []string) { - if len(b.strTarget) > 1 { - // TODO(barakmich): Sets of things, IN clause - return "", []string{} - } - if len(b.strTarget) == 0 { - return fmt.Sprintf("%s = %s", b.pair, b.target), []string{} - } - return fmt.Sprintf("%s = ?", b.pair), []string{b.strTarget[0]} -} - -func (b baseClause) size() int { return 1 } - -func (b baseClause) getTables() map[string]bool { - out := make(map[string]bool) - if b.pair.table != "" { - out[b.pair.table] = true - } - if b.target.table != "" { - out[b.target.table] = true - } - return out -} - -type joinClause struct { - left clause - right clause - op clauseOp -} - -func (jc joinClause) size() int { - size := 0 - if jc.left != nil { - size += jc.left.size() - } - if jc.right != nil { - size += jc.right.size() - } - return size -} - -func (jc joinClause) toSQL() (string, []string) { - if jc.left == nil { - if jc.right == nil { - return "", []string{} - } - return jc.right.toSQL() - } - if jc.right == nil { - return jc.left.toSQL() - } - l, lstr := jc.left.toSQL() - r, rstr := jc.right.toSQL() - lstr = append(lstr, rstr...) - var op string - switch jc.op { - case andClause: - op = "AND" - case orClause: - op = "OR" - } - return fmt.Sprintf("(%s %s %s)", l, op, r), lstr -} - -func (jc joinClause) getTables() map[string]bool { - var m map[string]bool - if jc.left != nil { - m = jc.left.getTables() - } else { - m = make(map[string]bool) - } - if jc.right != nil { - for k, _ := range jc.right.getTables() { - m[k] = true - } - } - return m -} - -type tag struct { - pair tableDir - t string -} - -type statementType int - -const ( - node statementType = iota - link -) - -type clauseOp int - -const ( - andClause clauseOp = iota - orClause -) - -func (it *StatementIterator) canonicalizeWhere() (string, []string) { - var out []string - var values []string - for _, b := range it.buildWhere { - b.pair.table = it.tableName() - s, v := b.toSQL() - values = append(values, v...) - out = append(out, s) - } - return strings.Join(out, " AND "), values -} - -func (it *StatementIterator) getTables() map[string]bool { - m := make(map[string]bool) - if it.where != nil { - m = it.where.getTables() - } - for _, t := range it.tags { - if t.pair.table != "" { - m[t.pair.table] = true - } - } - return m -} - -func (it *StatementIterator) tableName() string { - return fmt.Sprintf("t_%d", it.uid) -} - -func (it *StatementIterator) buildQuery(contains bool, v graph.Value) (string, []string) { - str := "SELECT " - var t []string - if it.stType == link { - t = []string{ - fmt.Sprintf("%s.subject", it.tableName()), - fmt.Sprintf("%s.predicate", it.tableName()), - fmt.Sprintf("%s.object", it.tableName()), - fmt.Sprintf("%s.label", it.tableName()), - } - } else { - t = []string{fmt.Sprintf("%s.%s as __execd", it.tableName(), it.dir)} - } - for _, v := range it.tags { - if v.pair.table == "" { - v.pair.table = it.tableName() - } - t = append(t, fmt.Sprintf("%s as %s", v.pair, v.t)) - } - for _, v := range it.tagger.Tags() { - t = append(t, fmt.Sprintf("%s as %s", tableDir{it.tableName(), it.dir}, v)) - } - str += strings.Join(t, ", ") - str += " FROM " - t = []string{fmt.Sprintf("quads as %s", it.tableName())} - for k, _ := range it.getTables() { - if k != it.tableName() { - t = append(t, fmt.Sprintf("quads as %s", k)) - } - } - str += strings.Join(t, ", ") - str += " WHERE " - var values []string - var s string - if len(it.buildWhere) != 0 { - s, values = it.canonicalizeWhere() - } - if it.where != nil { - if s != "" { - s += " AND " - } - where, v2 := it.where.toSQL() - s += where - values = append(values, v2...) - } - - if contains { - if s != "" { - s += " AND " - } - if it.stType == link { - q := v.(quad.Quad) - t = []string{ - fmt.Sprintf("%s.subject = ?", it.tableName()), - fmt.Sprintf("%s.predicate = ?", it.tableName()), - fmt.Sprintf("%s.object = ?", it.tableName()), - fmt.Sprintf("%s.label = ?", it.tableName()), - } - s += " " + strings.Join(t, " AND ") + " " - values = append(values, q.Subject) - values = append(values, q.Predicate) - values = append(values, q.Object) - values = append(values, q.Label) - } else { - s += fmt.Sprintf("%s.%s = ? ", it.tableName(), it.dir) - values = append(values, v.(string)) - } - - } - str += s - if it.stType == node { - str += " ORDER BY __execd " - } - str += ";" - for i := 1; i <= len(values); i++ { - str = strings.Replace(str, "?", fmt.Sprintf("$%d", i), 1) - } - glog.V(2).Infoln(str) - if glog.V(4) { - dstr := str - for i := 1; i <= len(values); i++ { - dstr = strings.Replace(dstr, fmt.Sprintf("$%d", i), fmt.Sprintf("'%s'", values[i-1]), 1) - } - glog.V(4).Infoln(dstr) - } - return str, values -} - -type StatementIterator struct { - uid uint64 - qs *QuadStore - - // Only for links - buildWhere []baseClause - - where clause - tagger graph.Tagger - tags []tag - err error - cursor *sql.Rows - stType statementType - dir quad.Direction - result map[string]string - resultIndex int - resultList [][]string - resultNext [][]string - cols []string - resultQuad quad.Quad - size int64 -} - -func (it *StatementIterator) Clone() graph.Iterator { - m := &StatementIterator{ - uid: iterator.NextUID(), - qs: it.qs, - buildWhere: it.buildWhere, - where: it.where, - stType: it.stType, - size: it.size, - dir: it.dir, - } - copy(it.tags, m.tags) - m.tagger.CopyFrom(it) - return m -} - -func NewStatementIterator(qs *QuadStore, d quad.Direction, val string) *StatementIterator { - it := &StatementIterator{ - uid: iterator.NextUID(), - qs: qs, - buildWhere: []baseClause{ - baseClause{ - pair: tableDir{"", d}, - strTarget: []string{val}, - }, - }, - stType: link, - size: -1, - } - return it -} - -func (it *StatementIterator) UID() uint64 { - return it.uid -} - -func (it *StatementIterator) Reset() { - it.err = nil - it.Close() -} - -func (it *StatementIterator) Err() error { - return it.err -} - -func (it *StatementIterator) Close() error { - if it.cursor != nil { - err := it.cursor.Close() - if err != nil { - return err - } - it.cursor = nil - } - return nil -} - -func (it *StatementIterator) Tagger() *graph.Tagger { - return &it.tagger -} - -func (it *StatementIterator) Result() graph.Value { - if it.stType == node { - return it.result["__execd"] - } - return it.resultQuad -} - -func (it *StatementIterator) TagResults(dst map[string]graph.Value) { - for tag, value := range it.result { - if tag == "__execd" { - for _, tag := range it.tagger.Tags() { - dst[tag] = value - } - continue - } - dst[tag] = value - } - - for tag, value := range it.tagger.Fixed() { - dst[tag] = value - } -} - -func (it *StatementIterator) Type() graph.Type { - return sqlBuilderType -} - -func (it *StatementIterator) preFilter(v graph.Value) bool { - if it.stType == link { - q := v.(quad.Quad) - for _, b := range it.buildWhere { - if len(b.strTarget) == 0 { - continue - } - canFilter := true - for _, s := range b.strTarget { - if q.Get(b.pair.dir) == s { - canFilter = false - break - } - } - if canFilter { - return true - } - } - } - return false -} - -func (it *StatementIterator) Contains(v graph.Value) bool { - var err error - if it.preFilter(v) { - return false - } - q, values := it.buildQuery(true, v) - ivalues := make([]interface{}, 0, len(values)) - for _, v := range values { - ivalues = append(ivalues, v) - } - it.cursor, err = it.qs.db.Query(q, ivalues...) - if err != nil { - glog.Errorf("Couldn't make query: %v", err) - it.err = err - it.cursor.Close() - return false - } - it.cols, err = it.cursor.Columns() - if err != nil { - glog.Errorf("Couldn't get columns") - it.err = err - it.cursor.Close() - return false - } - it.resultList = nil - for { - if !it.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := it.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - it.err = err - } - it.cursor.Close() - break - } - s, err := it.scan() - if err != nil { - it.err = err - it.cursor.Close() - return false - } - it.resultList = append(it.resultList, s) - } - it.cursor.Close() - it.cursor = nil - if len(it.resultList) != 0 { - it.resultIndex = 0 - it.buildResult(0) - return true - } - return false -} - -func (it *StatementIterator) SubIterators() []graph.Iterator { - return nil -} - -func (it *StatementIterator) Sorted() bool { return false } -func (it *StatementIterator) Optimize() (graph.Iterator, bool) { return it, false } - -func (it *StatementIterator) Size() (int64, bool) { - - if it.size != -1 { - return it.size, true - } - if it.stType == node { - if it.where == nil { - return it.qs.Size() / int64(len(it.buildWhere)+1), true - } - return it.qs.Size() / int64(it.where.size()+len(it.buildWhere)+1), true - } - b := it.buildWhere[0] - if len(b.strTarget) > 0 { - it.size = it.qs.sizeForIterator(false, b.pair.dir, b.strTarget[0]) - } else { - return it.qs.Size(), false - } - return it.size, true -} - -func (it *StatementIterator) Describe() graph.Description { - size, _ := it.Size() - return graph.Description{ - UID: it.UID(), - Name: fmt.Sprintf("SQL_QUERY: %#v", it), - Type: it.Type(), - Size: size, - } -} - -func (it *StatementIterator) Stats() graph.IteratorStats { - size, _ := it.Size() - return graph.IteratorStats{ - ContainsCost: 1, - NextCost: 5, - Size: size, - } -} - -func (it *StatementIterator) makeCursor() { - if it.cursor != nil { - it.cursor.Close() - } - q, values := it.buildQuery(false, nil) - ivalues := make([]interface{}, 0, len(values)) - for _, v := range values { - ivalues = append(ivalues, v) - } - cursor, err := it.qs.db.Query(q, ivalues...) - if err != nil { - glog.Errorf("Couldn't get cursor from SQL database: %v", err) - cursor = nil - } - it.cursor = cursor -} - -func (it *StatementIterator) NextPath() bool { - it.resultIndex += 1 - if it.resultIndex >= len(it.resultList) { - return false - } - it.buildResult(it.resultIndex) - return true -} - -func (it *StatementIterator) Next() bool { - var err error - graph.NextLogIn(it) - if it.cursor == nil { - it.makeCursor() - it.cols, err = it.cursor.Columns() - if err != nil { - glog.Errorf("Couldn't get columns") - it.err = err - it.cursor.Close() - return false - } - // iterate the first one - if !it.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := it.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - it.err = err - } - it.cursor.Close() - return false - } - s, err := it.scan() - if err != nil { - it.err = err - it.cursor.Close() - return false - } - it.resultNext = append(it.resultNext, s) - } - if it.resultList != nil && it.resultNext == nil { - // We're on something and there's no next - return false - } - it.resultList = it.resultNext - it.resultNext = nil - it.resultIndex = 0 - for { - if !it.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := it.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - it.err = err - } - it.cursor.Close() - break - } - s, err := it.scan() - if err != nil { - it.err = err - it.cursor.Close() - return false - } - if it.stType == node { - if it.resultList[0][0] != s[0] { - it.resultNext = append(it.resultNext, s) - break - } else { - it.resultList = append(it.resultList, s) - } - } else { - if it.resultList[0][0] == s[0] && it.resultList[0][1] == s[1] && it.resultList[0][2] == s[2] && it.resultList[0][3] == s[3] { - it.resultList = append(it.resultList, s) - } else { - it.resultNext = append(it.resultNext, s) - break - } - } - - } - if len(it.resultList) == 0 { - return graph.NextLogOut(it, nil, false) - } - it.buildResult(0) - return graph.NextLogOut(it, it.Result(), true) -} - -func (it *StatementIterator) scan() ([]string, error) { - pointers := make([]interface{}, len(it.cols)) - container := make([]string, len(it.cols)) - for i, _ := range pointers { - pointers[i] = &container[i] - } - err := it.cursor.Scan(pointers...) - if err != nil { - glog.Errorf("Error scanning iterator: %v", err) - it.err = err - return nil, err - } - return container, nil -} - -func (it *StatementIterator) buildResult(i int) { - container := it.resultList[i] - if it.stType == node { - it.result = make(map[string]string) - for i, c := range it.cols { - it.result[c] = container[i] - } - return - } - var q quad.Quad - q.Subject = container[0] - q.Predicate = container[1] - q.Object = container[2] - q.Label = container[3] - it.resultQuad = q - it.result = make(map[string]string) - for i, c := range it.cols[4:] { - it.result[c] = container[i+4] - } -} diff --git a/graph/sql/builder_iterator_test.go b/graph/sql/builder_iterator_test.go deleted file mode 100644 index cbb960d..0000000 --- a/graph/sql/builder_iterator_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2015 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sql - -import ( - "flag" - "fmt" - "testing" - - "github.com/google/cayley/graph" - "github.com/google/cayley/graph/iterator" - "github.com/google/cayley/quad" -) - -var dbpath = flag.String("dbpath", "", "Path to running DB") - -func TestSimpleSQL(t *testing.T) { - it := NewStatementIterator(nil, quad.Object, "cool") - s, v := it.buildQuery(false, nil) - fmt.Println(s, v) -} - -// Functional tests - -func TestQuadIteration(t *testing.T) { - if *dbpath == "" { - t.SkipNow() - } - db, err := newQuadStore(*dbpath, nil) - if err != nil { - t.Fatal(err) - } - it := NewStatementIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") - for graph.Next(it) { - fmt.Println(it.Result()) - } - it = NewStatementIterator(db.(*QuadStore), quad.Subject, "/en/casablanca_1942") - s, v := it.buildQuery(false, nil) - fmt.Println(s, v) - c := 0 - for graph.Next(it) { - fmt.Println(it.Result()) - c += 1 - } - if c != 18 { - t.Errorf("Not enough results, got %d expected 18") - } -} - -func TestNodeIteration(t *testing.T) { - if *dbpath == "" { - t.SkipNow() - } - db, err := newQuadStore(*dbpath, nil) - if err != nil { - t.Fatal(err) - } - it := &StatementIterator{ - uid: iterator.NextUID(), - qs: db.(*QuadStore), - stType: node, - dir: quad.Object, - tags: []tag{ - tag{ - pair: tableDir{ - table: "t_4", - dir: quad.Subject, - }, - t: "x", - }, - }, - where: baseClause{ - pair: tableDir{ - table: "t_4", - dir: quad.Subject, - }, - strTarget: []string{"/en/casablanca_1942"}, - }, - } - s, v := it.buildQuery(false, nil) - it.Tagger().Add("id") - fmt.Println(s, v) - for graph.Next(it) { - fmt.Println(it.Result()) - out := make(map[string]graph.Value) - it.TagResults(out) - for k, v := range out { - fmt.Printf("%s: %v\n", k, v.(string)) - } - } - contains := it.Contains("Casablanca") - s, v = it.buildQuery(true, "Casablanca") - fmt.Println(s, v) - it.Tagger().Add("id") - if !contains { - t.Error("Didn't contain Casablanca") - } -} diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index a109d6a..386270c 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -45,7 +45,6 @@ func intersectNode(a *SQLNodeIterator, b *SQLNodeIterator) (graph.Iterator, erro qs: a.qs, tableName: newTableName(), linkIts: append(a.linkIts, b.linkIts...), - tagdirs: append(a.tagdirs, b.tagdirs...), } m.Tagger().CopyFrom(a) m.Tagger().CopyFrom(b) @@ -59,6 +58,7 @@ func intersectLink(a *SQLLinkIterator, b *SQLLinkIterator) (graph.Iterator, erro tableName: newTableName(), nodeIts: append(a.nodeIts, b.nodeIts...), constraints: append(a.constraints, b.constraints...), + tagdirs: append(a.tagdirs, b.tagdirs...), } m.Tagger().CopyFrom(a) m.Tagger().CopyFrom(b) @@ -150,25 +150,24 @@ func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool } newit.Tagger().CopyFrom(it) return newit, true - //case graph.All: - //newit := &StatementIterator{ - //uid: iterator.NextUID(), - //qs: qs, - //stType: link, - //size: qs.Size(), - //} - //for _, t := range primary.Tagger().Tags() { - //newit.tags = append(newit.tags, tag{ - //pair: tableDir{"", it.Direction()}, - //t: t, - //}) - //} - //for k, v := range primary.Tagger().Fixed() { - //newit.tagger.AddFixed(k, v) - //} - //newit.tagger.CopyFrom(it) + case graph.All: + newit := &SQLLinkIterator{ + uid: iterator.NextUID(), + qs: qs, + size: qs.Size(), + } + for _, t := range primary.Tagger().Tags() { + newit.tagdirs = append(newit.tagdirs, tagDir{ + dir: it.Direction(), + tag: t, + }) + } + for k, v := range primary.Tagger().Fixed() { + newit.tagger.AddFixed(k, v) + } + newit.tagger.CopyFrom(it) - //return newit, true + return newit, true } return it, false } diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go index 1986df1..1d71a16 100644 --- a/graph/sql/sql_link_iterator.go +++ b/graph/sql/sql_link_iterator.go @@ -45,11 +45,26 @@ type constraint struct { } type tagDir struct { - tag string - dir quad.Direction + tag string + dir quad.Direction + table string + justLocal bool +} - // Not to be stored in the iterator directly - table string +func (t tagDir) String() string { + if t.dir == quad.Any { + if t.justLocal { + return fmt.Sprintf("%s.__execd as %s", t.table, t.tag) + } + return fmt.Sprintf("%s.%s as %s", t.table, t.tag, t.tag) + } + return fmt.Sprintf("%s.%s as %s", t.table, t.dir, t.tag) +} + +type tableDef struct { + table string + name string + values []string } type sqlItDir struct { @@ -58,8 +73,9 @@ type sqlItDir struct { } type sqlIterator interface { + buildSQL(next bool, val graph.Value) (string, []string) sqlClone() sqlIterator - getTables() []string + getTables() []tableDef getTags() []tagDir buildWhere() (string, []string) tableID() tagDir @@ -76,6 +92,7 @@ type SQLLinkIterator struct { constraints []constraint tableName string size int64 + tagdirs []tagDir result map[string]string resultIndex int @@ -111,7 +128,8 @@ func (l *SQLLinkIterator) Clone() graph.Iterator { qs: l.qs, tableName: l.tableName, size: l.size, - constraints: make([]constraint, 0, len(l.constraints)), + constraints: make([]constraint, len(l.constraints)), + tagdirs: make([]tagDir, len(l.tagdirs)), } for _, i := range l.nodeIts { m.nodeIts = append(m.nodeIts, sqlItDir{ @@ -120,6 +138,7 @@ func (l *SQLLinkIterator) Clone() graph.Iterator { }) } copy(m.constraints, l.constraints) + copy(m.tagdirs, l.tagdirs) m.tagger.CopyFrom(l) return m } @@ -187,6 +206,9 @@ func (l *SQLLinkIterator) Size() (int64, bool) { } if len(l.constraints) > 0 { l.size = l.qs.sizeForIterator(false, l.constraints[0].dir, l.constraints[0].vals[0]) + } else if len(l.nodeIts) > 1 { + subsize, _ := l.nodeIts[0].it.(*SQLNodeIterator).Size() + return subsize * 20, false } else { return l.qs.Size(), false } @@ -216,11 +238,31 @@ func (l *SQLLinkIterator) Type() graph.Type { return sqlLinkType } +func (l *SQLLinkIterator) preFilter(v graph.Value) bool { + for _, c := range l.constraints { + none := true + desired := v.(quad.Quad).Get(c.dir) + for _, s := range c.vals { + if s == desired { + none = false + break + } + } + if none { + return true + } + } + return false +} + func (l *SQLLinkIterator) Contains(v graph.Value) bool { var err error - //if it.preFilter(v) { - //return false - //} + if l.preFilter(v) { + return false + } + if len(l.nodeIts) == 0 { + return true + } err = l.makeCursor(false, v) if err != nil { glog.Errorf("Couldn't make query: %v", err) @@ -288,8 +330,8 @@ func (l *SQLLinkIterator) buildResult(i int) { } } -func (l *SQLLinkIterator) getTables() []string { - out := []string{l.tableName} +func (l *SQLLinkIterator) getTables() []tableDef { + out := []tableDef{tableDef{table: "quads", name: l.tableName}} for _, i := range l.nodeIts { out = append(out, i.it.getTables()...) } @@ -305,6 +347,14 @@ func (l *SQLLinkIterator) getTags() []tagDir { tag: tag, }) } + for _, tag := range l.tagdirs { + out = append(out, tagDir{ + dir: tag.dir, + table: l.tableName, + tag: tag.tag, + }) + + } for _, i := range l.nodeIts { out = append(out, i.it.getTags()...) } @@ -320,7 +370,11 @@ func (l *SQLLinkIterator) buildWhere() (string, []string) { } for _, i := range l.nodeIts { t := i.it.tableID() - q = append(q, fmt.Sprintf("%s.%s = %s.%s", l.tableName, i.dir, t.table, t.dir)) + dir := t.dir.String() + if t.dir == quad.Any { + dir = t.tag + } + q = append(q, fmt.Sprintf("%s.%s = %s.%s", l.tableName, i.dir, t.table, dir)) } for _, i := range l.nodeIts { s, v := i.it.buildWhere() @@ -339,7 +393,7 @@ func (l *SQLLinkIterator) tableID() tagDir { } func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string) { - query := "SELECT " + query := "SELECT DISTINCT " t := []string{ fmt.Sprintf("%s.subject", l.tableName), fmt.Sprintf("%s.predicate", l.tableName), @@ -347,18 +401,21 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string fmt.Sprintf("%s.label", l.tableName), } for _, v := range l.getTags() { - t = append(t, fmt.Sprintf("%s.%s as %s", v.table, v.dir, v.tag)) + t = append(t, v.String()) } query += strings.Join(t, ", ") query += " FROM " t = []string{} + var values []string for _, k := range l.getTables() { - t = append(t, fmt.Sprintf("quads as %s", k)) + values = append(values, k.values...) + t = append(t, fmt.Sprintf("%s as %s", k.table, k.name)) } query += strings.Join(t, ", ") query += " WHERE " - constraint, values := l.buildWhere() + constraint, wherevalues := l.buildWhere() + values = append(values, wherevalues...) if !next { v := val.(quad.Quad) if constraint != "" { diff --git a/graph/sql/sql_link_iterator_test.go b/graph/sql/sql_link_iterator_test.go index b13e389..5d66d2d 100644 --- a/graph/sql/sql_link_iterator_test.go +++ b/graph/sql/sql_link_iterator_test.go @@ -15,6 +15,7 @@ package sql import ( + "flag" "fmt" "testing" @@ -23,6 +24,8 @@ import ( "github.com/google/cayley/quad" ) +var dbpath = flag.String("dbpath", "", "Path to running DB") + func TestSQLLink(t *testing.T) { it := NewSQLLinkIterator(nil, quad.Object, "cool") s, v := it.buildSQL(true, nil) diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index 4f18382..58efc9b 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -18,6 +18,7 @@ import ( "database/sql" "fmt" "strings" + "sync/atomic" "github.com/barakmich/glog" "github.com/google/cayley/graph" @@ -26,9 +27,16 @@ import ( ) var sqlNodeType graph.Type +var sqlNodeTableID uint64 func init() { sqlNodeType = graph.RegisterIterator("sqlnode") + atomic.StoreUint64(&sqlNodeTableID, 0) +} + +func newNodeTableName() string { + id := atomic.AddUint64(&sqlNodeTableID, 1) + return fmt.Sprintf("n_%d", id) } type SQLNodeIterator struct { @@ -38,10 +46,10 @@ type SQLNodeIterator struct { tableName string err error - cursor *sql.Rows - linkIts []sqlItDir - size int64 - tagdirs []tagDir + cursor *sql.Rows + linkIts []sqlItDir + nodetables []string + size int64 result map[string]string resultIndex int @@ -67,7 +75,6 @@ func (n *SQLNodeIterator) Clone() graph.Iterator { it: i.it.sqlClone(), }) } - copy(n.tagdirs, m.tagdirs) m.tagger.CopyFrom(n) return m } @@ -173,47 +180,106 @@ func (n *SQLNodeIterator) buildResult(i int) { } } -func (n *SQLNodeIterator) getTables() []string { - var out []string - for _, i := range n.linkIts { - out = append(out, i.it.getTables()...) +func (n *SQLNodeIterator) makeNodeTableNames() { + if n.nodetables != nil { + return + } + n.nodetables = make([]string, len(n.linkIts)) + for i, _ := range n.nodetables { + n.nodetables[i] = newNodeTableName() + } +} + +func (n *SQLNodeIterator) getTables() []tableDef { + var out []tableDef + switch len(n.linkIts) { + case 0: + return []tableDef{tableDef{table: "quads", name: n.tableName}} + case 1: + out = n.linkIts[0].it.getTables() + default: + return n.buildSubqueries() } if len(out) == 0 { - out = append(out, n.tableName) + out = append(out, tableDef{table: "quads", name: n.tableName}) + } + return out +} + +func (n *SQLNodeIterator) buildSubqueries() []tableDef { + var out []tableDef + n.makeNodeTableNames() + for i, it := range n.linkIts { + var td tableDef + // TODO(barakmich): This is a dirty hack. The real implementation is to + // separate SQL iterators to build a similar tree as we're doing here, and + // have a single graph.Iterator 'caddy' structure around it. + subNode := &SQLNodeIterator{ + uid: iterator.NextUID(), + tableName: newTableName(), + linkIts: []sqlItDir{it}, + } + var table string + table, td.values = subNode.buildSQL(true, nil) + td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) + td.name = n.nodetables[i] + out = append(out, td) } return out } func (n *SQLNodeIterator) tableID() tagDir { - if len(n.linkIts) == 0 { + switch len(n.linkIts) { + case 0: return tagDir{ table: n.tableName, dir: quad.Any, + tag: "__execd", + } + case 1: + return tagDir{ + table: n.linkIts[0].it.tableID().table, + dir: n.linkIts[0].dir, + tag: "__execd", + } + default: + n.makeNodeTableNames() + return tagDir{ + table: n.nodetables[0], + dir: quad.Any, + tag: "__execd", } - } - return tagDir{ - table: n.linkIts[0].it.tableID().table, - dir: n.linkIts[0].dir, } } -func (n *SQLNodeIterator) getTags() []tagDir { +func (n *SQLNodeIterator) getLocalTags() []tagDir { myTag := n.tableID() var out []tagDir for _, tag := range n.tagger.Tags() { out = append(out, tagDir{ - dir: myTag.dir, - table: myTag.table, - tag: tag, + dir: myTag.dir, + table: myTag.table, + tag: tag, + justLocal: true, }) } - for _, tag := range n.tagdirs { - out = append(out, tagDir{ - dir: tag.dir, - table: myTag.table, - tag: tag.tag, - }) + return out +} +func (n *SQLNodeIterator) getTags() []tagDir { + out := n.getLocalTags() + if len(n.linkIts) > 1 { + n.makeNodeTableNames() + for i, it := range n.linkIts { + for _, v := range it.it.getTags() { + out = append(out, tagDir{ + tag: v.tag, + dir: quad.Any, + table: n.nodetables[i], + }) + } + } + return out } for _, i := range n.linkIts { out = append(out, i.it.getTags()...) @@ -225,18 +291,15 @@ func (n *SQLNodeIterator) buildWhere() (string, []string) { var q []string var vals []string if len(n.linkIts) > 1 { - baseTable := n.linkIts[0].it.tableID().table - baseDir := n.linkIts[0].dir - for _, i := range n.linkIts[1:] { - table := i.it.tableID().table - dir := i.dir - q = append(q, fmt.Sprintf("%s.%s = %s.%s", baseTable, baseDir, table, dir)) + for _, tb := range n.nodetables[1:] { + q = append(q, fmt.Sprintf("%s.__execd = %s.__execd", n.nodetables[0], tb)) + } + } else { + for _, i := range n.linkIts { + s, v := i.it.buildWhere() + q = append(q, s) + vals = append(vals, v...) } - } - for _, i := range n.linkIts { - s, v := i.it.buildWhere() - q = append(q, s) - vals = append(vals, v...) } query := strings.Join(q, " AND ") return query, vals @@ -244,21 +307,26 @@ func (n *SQLNodeIterator) buildWhere() (string, []string) { func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string) { topData := n.tableID() - query := "SELECT " + tags := []tagDir{topData} + tags = append(tags, n.getTags()...) + query := "SELECT DISTINCT " var t []string - t = append(t, fmt.Sprintf("%s.%s as __execd", topData.table, topData.dir)) - for _, v := range n.getTags() { - t = append(t, fmt.Sprintf("%s.%s as %s", v.table, v.dir, v.tag)) + for _, v := range tags { + t = append(t, v.String()) } query += strings.Join(t, ", ") query += " FROM " t = []string{} + var values []string for _, k := range n.getTables() { - t = append(t, fmt.Sprintf("quads as %s", k)) + values = append(values, k.values...) + t = append(t, fmt.Sprintf("%s as %s", k.table, k.name)) } query += strings.Join(t, ", ") query += " WHERE " - constraint, values := n.buildWhere() + + constraint, wherevalues := n.buildWhere() + values = append(values, wherevalues...) if !next { v := val.(string) @@ -368,6 +436,7 @@ func (n *SQLNodeIterator) makeCursor(next bool, value graph.Value) error { cursor, err := n.qs.db.Query(q, ivalues...) if err != nil { glog.Errorf("Couldn't get cursor from SQL database: %v", err) + glog.Errorf("Query: %v", q) cursor = nil return err } diff --git a/integration/integration_test.go b/integration/integration_test.go index 76f4178..a57a469 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -58,7 +58,6 @@ var benchmarkQueries = []struct { // Easy one to get us started. How quick is the most straightforward retrieval? { message: "name predicate", - skip: true, query: ` g.V("Humphrey Bogart").In("name").All() `, @@ -72,7 +71,6 @@ var benchmarkQueries = []struct { // that's going to be measurably slower for every other backend. { message: "two large sets with no intersection", - skip: true, query: ` function getId(x) { return g.V(x).In("name") } var actor_to_film = g.M().In("/film/performance/actor").In("/film/film/starring") @@ -526,6 +524,7 @@ func TestQueries(t *testing.T) { } func TestDeletedAndRecreatedQueries(t *testing.T) { + t.Skip() if testing.Short() { t.Skip() } @@ -541,7 +540,8 @@ func checkQueries(t *testing.T) { if test.skip { continue } - fmt.Printf("Now testing %s\n", test.message) + tInit := time.Now() + fmt.Printf("Now testing %s ", test.message) ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true) _, err := ses.Parse(test.query) if err != nil { @@ -570,6 +570,7 @@ func checkQueries(t *testing.T) { t.Error("Query timed out: skipping validation.") continue } + fmt.Printf("(%v)\n", time.Since(tInit)) if len(got) != len(test.expect) { t.Errorf("Unexpected number of results, got:%d expect:%d on %s.", len(got), len(test.expect), test.message) From b754810c6e3e349f51edcf602822210d26676fe2 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Tue, 28 Jul 2015 16:31:11 -0400 Subject: [PATCH 08/18] Remove old iterator code, leaving a simple all-iterator for the sql backend --- graph/iterator/and_iterator_optimize.go | 2 +- graph/sql/all_iterator.go | 212 ++++++++++++++++++++++++ graph/sql/iterator.go | 274 -------------------------------- integration/integration_test.go | 5 +- 4 files changed, 215 insertions(+), 278 deletions(-) create mode 100644 graph/sql/all_iterator.go delete mode 100644 graph/sql/iterator.go diff --git a/graph/iterator/and_iterator_optimize.go b/graph/iterator/and_iterator_optimize.go index cec5960..10aa803 100644 --- a/graph/iterator/and_iterator_optimize.go +++ b/graph/iterator/and_iterator_optimize.go @@ -331,7 +331,7 @@ func materializeIts(its []graph.Iterator) []graph.Iterator { out = append(out, its[0]) for _, it := range its[1:] { stats := it.Stats() - if false && stats.Size*stats.NextCost < (stats.ContainsCost*(1+(stats.Size/(allStats.Size+1)))) { + if stats.Size*stats.NextCost < (stats.ContainsCost * (1 + (stats.Size / (allStats.Size + 1)))) { if graph.Height(it, graph.Materialize) > 10 { out = append(out, NewMaterialize(it)) continue diff --git a/graph/sql/all_iterator.go b/graph/sql/all_iterator.go new file mode 100644 index 0000000..05f94b3 --- /dev/null +++ b/graph/sql/all_iterator.go @@ -0,0 +1,212 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "database/sql" + + "github.com/barakmich/glog" + + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +type AllIterator struct { + uid uint64 + tags graph.Tagger + qs *QuadStore + dir quad.Direction + val graph.Value + table string + cursor *sql.Rows + result graph.Value + err error +} + +func (it *AllIterator) makeCursor() { + var cursor *sql.Rows + var err error + if it.cursor != nil { + it.cursor.Close() + } + if it.table == "quads" { + cursor, err = it.qs.db.Query(`SELECT subject, predicate, object, label FROM quads;`) + if err != nil { + glog.Errorln("Couldn't get cursor from SQL database: %v", err) + cursor = nil + } + } else { + glog.V(4).Infoln("sql: getting node query") + cursor, err = it.qs.db.Query(`SELECT node FROM + ( + SELECT subject FROM quads + UNION + SELECT predicate FROM quads + UNION + SELECT object FROM quads + UNION + SELECT label FROM quads + ) AS DistinctNodes (node) WHERE node IS NOT NULL;`) + if err != nil { + glog.Errorln("Couldn't get cursor from SQL database: %v", err) + cursor = nil + } + glog.V(4).Infoln("sql: got node query") + } + it.cursor = cursor +} + +func NewAllIterator(qs *QuadStore, table string) *AllIterator { + it := &AllIterator{ + uid: iterator.NextUID(), + qs: qs, + table: table, + } + return it +} + +func (it *AllIterator) UID() uint64 { + return it.uid +} + +func (it *AllIterator) Reset() { + it.err = nil + it.Close() +} + +func (it *AllIterator) Err() error { + return it.err +} + +func (it *AllIterator) Close() error { + if it.cursor != nil { + err := it.cursor.Close() + if err != nil { + return err + } + it.cursor = nil + } + return nil +} + +func (it *AllIterator) Tagger() *graph.Tagger { + return &it.tags +} + +func (it *AllIterator) TagResults(dst map[string]graph.Value) { + for _, tag := range it.tags.Tags() { + dst[tag] = it.Result() + } + + for tag, value := range it.tags.Fixed() { + dst[tag] = value + } +} + +func (it *AllIterator) Clone() graph.Iterator { + var m *AllIterator + m = NewAllIterator(it.qs, it.table) + m.tags.CopyFrom(it) + return m +} + +func (it *AllIterator) SubIterators() []graph.Iterator { + return nil +} + +func (it *AllIterator) Next() bool { + graph.NextLogIn(it) + if it.cursor == nil { + it.makeCursor() + } + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + return false + } + if it.table == "nodes" { + var node string + err := it.cursor.Scan(&node) + if err != nil { + glog.Errorf("Error nexting node iterator: %v", err) + it.err = err + return false + } + it.result = node + return true + } + var q quad.Quad + err := it.cursor.Scan(&q.Subject, &q.Predicate, &q.Object, &q.Label) + if err != nil { + glog.Errorf("Error scanning sql iterator: %v", err) + it.err = err + return false + } + it.result = q + return graph.NextLogOut(it, it.result, true) +} + +func (it *AllIterator) Contains(v graph.Value) bool { + graph.ContainsLogIn(it, v) + it.result = v + return graph.ContainsLogOut(it, v, true) +} + +func (it *AllIterator) Size() (int64, bool) { + return it.qs.Size(), true +} + +func (it *AllIterator) Result() graph.Value { + if it.result == nil { + glog.Fatalln("result was nil", it) + } + return it.result +} + +func (it *AllIterator) NextPath() bool { + return false +} + +func (it *AllIterator) Type() graph.Type { + return graph.All +} + +func (it *AllIterator) Sorted() bool { return false } +func (it *AllIterator) Optimize() (graph.Iterator, bool) { return it, false } + +func (it *AllIterator) Describe() graph.Description { + size, _ := it.Size() + return graph.Description{ + UID: it.UID(), + Name: "sql/all", + Type: it.Type(), + Size: size, + } +} + +func (it *AllIterator) Stats() graph.IteratorStats { + size, _ := it.Size() + return graph.IteratorStats{ + ContainsCost: 1, + NextCost: 9999, + Size: size, + } +} diff --git a/graph/sql/iterator.go b/graph/sql/iterator.go deleted file mode 100644 index 1482eaa..0000000 --- a/graph/sql/iterator.go +++ /dev/null @@ -1,274 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sql - -import ( - "database/sql" - "fmt" - - "github.com/barakmich/glog" - - "github.com/google/cayley/graph" - "github.com/google/cayley/graph/iterator" - "github.com/google/cayley/quad" -) - -type Iterator struct { - uid uint64 - tags graph.Tagger - qs *QuadStore - dir quad.Direction - val graph.Value - size int64 - isAll bool - table string - cursor *sql.Rows - result graph.Value - err error -} - -func (it *Iterator) makeCursor() { - var cursor *sql.Rows - var err error - if it.cursor != nil { - it.cursor.Close() - } - if it.isAll { - if it.table == "quads" { - cursor, err = it.qs.db.Query(`SELECT subject, predicate, object, label FROM quads;`) - if err != nil { - glog.Errorln("Couldn't get cursor from SQL database: %v", err) - cursor = nil - } - } else { - glog.V(4).Infoln("sql: getting node query") - cursor, err = it.qs.db.Query(`SELECT node FROM - ( - SELECT subject FROM quads - UNION - SELECT predicate FROM quads - UNION - SELECT object FROM quads - UNION - SELECT label FROM quads - ) AS DistinctNodes (node) WHERE node IS NOT NULL;`) - if err != nil { - glog.Errorln("Couldn't get cursor from SQL database: %v", err) - cursor = nil - } - glog.V(4).Infoln("sql: got node query") - } - } else { - cursor, err = it.qs.db.Query( - fmt.Sprintf("SELECT subject, predicate, object, label FROM quads WHERE %s = $1;", it.dir.String()), it.val.(string)) - if err != nil { - glog.Errorln("Couldn't get cursor from SQL database: %v", err) - cursor = nil - } - } - it.cursor = cursor -} - -func NewIterator(qs *QuadStore, d quad.Direction, val graph.Value) *Iterator { - it := &Iterator{ - uid: iterator.NextUID(), - qs: qs, - dir: d, - size: -1, - val: val, - table: "quads", - isAll: false, - } - return it -} - -func NewAllIterator(qs *QuadStore, table string) *Iterator { - it := &Iterator{ - uid: iterator.NextUID(), - qs: qs, - dir: quad.Any, - size: qs.Size(), - table: table, - isAll: true, - } - return it -} - -func (it *Iterator) UID() uint64 { - return it.uid -} - -func (it *Iterator) Reset() { - it.err = nil - it.Close() -} - -func (it *Iterator) Err() error { - return it.err -} - -func (it *Iterator) Close() error { - if it.cursor != nil { - err := it.cursor.Close() - if err != nil { - return err - } - it.cursor = nil - } - return nil -} - -func (it *Iterator) Tagger() *graph.Tagger { - return &it.tags -} - -func (it *Iterator) TagResults(dst map[string]graph.Value) { - for _, tag := range it.tags.Tags() { - dst[tag] = it.Result() - } - - for tag, value := range it.tags.Fixed() { - dst[tag] = value - } -} - -func (it *Iterator) Clone() graph.Iterator { - var m *Iterator - if it.isAll { - m = NewAllIterator(it.qs, it.table) - } else { - m = NewIterator(it.qs, it.dir, it.val) - } - m.tags.CopyFrom(it) - return m -} - -func (it *Iterator) SubIterators() []graph.Iterator { - return nil -} - -func (it *Iterator) Next() bool { - graph.NextLogIn(it) - if it.cursor == nil { - it.makeCursor() - } - if !it.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := it.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - it.err = err - } - it.cursor.Close() - return false - } - if it.table == "nodes" { - var node string - err := it.cursor.Scan(&node) - if err != nil { - glog.Errorf("Error nexting node iterator: %v", err) - it.err = err - return false - } - it.result = node - return true - } - var q quad.Quad - err := it.cursor.Scan(&q.Subject, &q.Predicate, &q.Object, &q.Label) - if err != nil { - glog.Errorf("Error scanning sql iterator: %v", err) - it.err = err - return false - } - it.result = q - return graph.NextLogOut(it, it.result, true) -} - -func (it *Iterator) Contains(v graph.Value) bool { - graph.ContainsLogIn(it, v) - if it.isAll { - it.result = v - return graph.ContainsLogOut(it, v, true) - } - q := v.(quad.Quad) - if q.Get(it.dir) == it.val.(string) { - it.result = v - return graph.ContainsLogOut(it, v, true) - } - return graph.ContainsLogOut(it, v, false) -} - -func (it *Iterator) Size() (int64, bool) { - if it.size != -1 { - return it.size, true - } - it.size = it.qs.sizeForIterator(it.isAll, it.dir, it.val.(string)) - return it.size, true -} - -func (it *Iterator) Result() graph.Value { - if it.result == nil { - glog.Fatalln("result was nil", it) - } - return it.result -} - -func (it *Iterator) NextPath() bool { - return false -} - -var sqlType graph.Type - -func init() { - sqlType = graph.RegisterIterator("sql") -} - -func Type() graph.Type { return sqlType } - -func (it *Iterator) Type() graph.Type { - if it.isAll { - return graph.All - } - return sqlType -} - -func (it *Iterator) Sorted() bool { return false } -func (it *Iterator) Optimize() (graph.Iterator, bool) { return it, false } - -func (it *Iterator) Describe() graph.Description { - size, _ := it.Size() - return graph.Description{ - UID: it.UID(), - Name: fmt.Sprintf("%s/%s", it.val, it.dir), - Type: it.Type(), - Size: size, - } -} - -func (it *Iterator) Stats() graph.IteratorStats { - size, _ := it.Size() - if it.table == "nodes" || it.isAll { - return graph.IteratorStats{ - ContainsCost: 1, - NextCost: 9999, - Size: size, - } - } - return graph.IteratorStats{ - ContainsCost: 1, - NextCost: 5, - Size: size, - } -} diff --git a/integration/integration_test.go b/integration/integration_test.go index a57a469..51ab30a 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -524,7 +524,6 @@ func TestQueries(t *testing.T) { } func TestDeletedAndRecreatedQueries(t *testing.T) { - t.Skip() if testing.Short() { t.Skip() } @@ -541,7 +540,7 @@ func checkQueries(t *testing.T) { continue } tInit := time.Now() - fmt.Printf("Now testing %s ", test.message) + t.Logf("Now testing %s ", test.message) ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true) _, err := ses.Parse(test.query) if err != nil { @@ -570,7 +569,7 @@ func checkQueries(t *testing.T) { t.Error("Query timed out: skipping validation.") continue } - fmt.Printf("(%v)\n", time.Since(tInit)) + t.Logf("(%v)\n", time.Since(tInit)) if len(got) != len(test.expect) { t.Errorf("Unexpected number of results, got:%d expect:%d on %s.", len(got), len(test.expect), test.message) From 3e02bb2b714cbe14c6c12040658a12ee388df3ba Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 29 Jul 2015 15:56:15 -0400 Subject: [PATCH 09/18] refactor to SQL builder iterators and standard iterator wrapper --- graph/iterator.go | 12 +- graph/sql/optimizers.go | 74 ++++----- graph/sql/sql_iterator.go | 341 +++++++++++++++++++++++++++++++++++++++++ graph/sql/sql_link_iterator.go | 333 +++++----------------------------------- graph/sql/sql_node_iterator.go | 286 ++++------------------------------ 5 files changed, 453 insertions(+), 593 deletions(-) create mode 100644 graph/sql/sql_iterator.go diff --git a/graph/iterator.go b/graph/iterator.go index 0f3b76e..c2d46d9 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -67,8 +67,10 @@ func (t *Tagger) Fixed() map[string]Value { } func (t *Tagger) CopyFrom(src Iterator) { - st := src.Tagger() + t.CopyFromTagger(src.Tagger()) +} +func (t *Tagger) CopyFromTagger(st *Tagger) { t.tags = append(t.tags, st.tags...) if t.fixedTags == nil { @@ -331,16 +333,16 @@ func DumpStats(it Iterator) StatsContainer { func ContainsLogIn(it Iterator, val Value) { if glog.V(4) { - glog.V(4).Infof("%s %d CHECK CONTAINS %d", strings.ToUpper(it.Type().String()), it.UID(), val) + glog.V(4).Infof("%s %d CHECK CONTAINS %v", strings.ToUpper(it.Type().String()), it.UID(), val) } } func ContainsLogOut(it Iterator, val Value, good bool) bool { if glog.V(4) { if good { - glog.V(4).Infof("%s %d CHECK CONTAINS %d GOOD", strings.ToUpper(it.Type().String()), it.UID(), val) + glog.V(4).Infof("%s %d CHECK CONTAINS %v GOOD", strings.ToUpper(it.Type().String()), it.UID(), val) } else { - glog.V(4).Infof("%s %d CHECK CONTAINS %d BAD", strings.ToUpper(it.Type().String()), it.UID(), val) + glog.V(4).Infof("%s %d CHECK CONTAINS %v BAD", strings.ToUpper(it.Type().String()), it.UID(), val) } } return good @@ -355,7 +357,7 @@ func NextLogIn(it Iterator) { func NextLogOut(it Iterator, val Value, ok bool) bool { if glog.V(4) { if ok { - glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type().String()), it.UID(), val) + glog.V(4).Infof("%s %d NEXT IS %v", strings.ToUpper(it.Type().String()), it.UID(), val) } else { glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type().String()), it.UID()) } diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index 386270c..debf7a8 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -23,14 +23,14 @@ import ( "github.com/google/cayley/quad" ) -func intersect(a graph.Iterator, b graph.Iterator) (graph.Iterator, error) { +func intersect(a sqlIterator, b sqlIterator, qs *QuadStore) (*SQLIterator, error) { if anew, ok := a.(*SQLNodeIterator); ok { if bnew, ok := b.(*SQLNodeIterator); ok { - return intersectNode(anew, bnew) + return intersectNode(anew, bnew, qs) } } else if anew, ok := a.(*SQLLinkIterator); ok { if bnew, ok := b.(*SQLLinkIterator); ok { - return intersectLink(anew, bnew) + return intersectLink(anew, bnew, qs) } } else { @@ -39,41 +39,37 @@ func intersect(a graph.Iterator, b graph.Iterator) (graph.Iterator, error) { return nil, errors.New("Cannot combine SQL iterators of two different types") } -func intersectNode(a *SQLNodeIterator, b *SQLNodeIterator) (graph.Iterator, error) { +func intersectNode(a *SQLNodeIterator, b *SQLNodeIterator, qs *QuadStore) (*SQLIterator, error) { m := &SQLNodeIterator{ - uid: iterator.NextUID(), - qs: a.qs, tableName: newTableName(), linkIts: append(a.linkIts, b.linkIts...), } - m.Tagger().CopyFrom(a) - m.Tagger().CopyFrom(b) - return m, nil + m.Tagger().CopyFromTagger(a.Tagger()) + m.Tagger().CopyFromTagger(b.Tagger()) + it := NewSQLIterator(qs, m) + return it, nil } -func intersectLink(a *SQLLinkIterator, b *SQLLinkIterator) (graph.Iterator, error) { +func intersectLink(a *SQLLinkIterator, b *SQLLinkIterator, qs *QuadStore) (*SQLIterator, error) { m := &SQLLinkIterator{ - uid: iterator.NextUID(), - qs: a.qs, tableName: newTableName(), nodeIts: append(a.nodeIts, b.nodeIts...), constraints: append(a.constraints, b.constraints...), tagdirs: append(a.tagdirs, b.tagdirs...), } - m.Tagger().CopyFrom(a) - m.Tagger().CopyFrom(b) - return m, nil + m.Tagger().CopyFromTagger(a.Tagger()) + m.Tagger().CopyFromTagger(b.Tagger()) + it := NewSQLIterator(qs, m) + return it, nil } -func hasa(aIn graph.Iterator, d quad.Direction) (graph.Iterator, error) { +func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, error) { a, ok := aIn.(*SQLLinkIterator) if !ok { return nil, errors.New("Can't take the HASA of a link SQL iterator") } out := &SQLNodeIterator{ - uid: iterator.NextUID(), - qs: a.qs, tableName: newTableName(), linkIts: []sqlItDir{ sqlItDir{ @@ -82,18 +78,17 @@ func hasa(aIn graph.Iterator, d quad.Direction) (graph.Iterator, error) { }, }, } - return out, nil + it := NewSQLIterator(qs, out) + return it, nil } -func linksto(aIn graph.Iterator, d quad.Direction) (graph.Iterator, error) { +func linksto(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, error) { a, ok := aIn.(*SQLNodeIterator) if !ok { return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") } out := &SQLLinkIterator{ - uid: iterator.NextUID(), - qs: a.qs, tableName: newTableName(), nodeIts: []sqlItDir{ sqlItDir{ @@ -102,8 +97,8 @@ func linksto(aIn graph.Iterator, d quad.Direction) (graph.Iterator, error) { }, }, } - - return out, nil + it := NewSQLIterator(qs, out) + return it, nil } func (qs *QuadStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { @@ -141,9 +136,9 @@ func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool it.Close() return newIt, true } - case sqlNodeType: - //p := primary.(*SQLNodeIterator) - newit, err := linksto(primary, it.Direction()) + case sqlType: + p := primary.(*SQLIterator) + newit, err := linksto(p.sql, it.Direction(), qs) if err != nil { glog.Errorln(err) return it, false @@ -151,22 +146,20 @@ func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool newit.Tagger().CopyFrom(it) return newit, true case graph.All: - newit := &SQLLinkIterator{ - uid: iterator.NextUID(), - qs: qs, + linkit := &SQLLinkIterator{ size: qs.Size(), } for _, t := range primary.Tagger().Tags() { - newit.tagdirs = append(newit.tagdirs, tagDir{ + linkit.tagdirs = append(linkit.tagdirs, tagDir{ dir: it.Direction(), tag: t, }) } for k, v := range primary.Tagger().Fixed() { - newit.tagger.AddFixed(k, v) + linkit.tagger.AddFixed(k, v) } - newit.tagger.CopyFrom(it) - + linkit.tagger.CopyFrom(it) + newit := NewSQLIterator(qs, linkit) return newit, true } return it, false @@ -175,18 +168,18 @@ func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool func (qs *QuadStore) optimizeAnd(it *iterator.And) (graph.Iterator, bool) { subs := it.SubIterators() var unusedIts []graph.Iterator - var newit graph.Iterator + var newit *SQLIterator newit = nil changed := false var err error for _, it := range subs { - if it.Type() == sqlLinkType || it.Type() == sqlNodeType { + if it.Type() == sqlType { if newit == nil { - newit = it + newit = it.(*SQLIterator) } else { changed = true - newit, err = intersect(newit, it) + newit, err = intersect(newit.sql, it.(*SQLIterator).sql, qs) if err != nil { glog.Error(err) return it, false @@ -219,8 +212,9 @@ func (qs *QuadStore) optimizeHasA(it *iterator.HasA) (graph.Iterator, bool) { return it, false } primary := subs[0] - if primary.Type() == sqlLinkType { - newit, err := hasa(primary, it.Direction()) + if primary.Type() == sqlType { + p := primary.(*SQLIterator) + newit, err := hasa(p.sql, it.Direction(), qs) if err != nil { glog.Errorln(err) return it, false diff --git a/graph/sql/sql_iterator.go b/graph/sql/sql_iterator.go new file mode 100644 index 0000000..74ca0c2 --- /dev/null +++ b/graph/sql/sql_iterator.go @@ -0,0 +1,341 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "database/sql" + "fmt" + "strings" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/quad" +) + +var sqlType graph.Type + +func init() { + sqlType = graph.RegisterIterator("sql") +} + +type SQLIterator struct { + uid uint64 + qs *QuadStore + cursor *sql.Rows + err error + + sql sqlIterator + + result map[string]string + resultIndex int + resultList [][]string + resultNext [][]string + cols []string +} + +func (it *SQLIterator) Clone() graph.Iterator { + m := &SQLIterator{ + uid: iterator.NextUID(), + qs: it.qs, + sql: it.sql.sqlClone(), + } + return m +} + +func (it *SQLIterator) UID() uint64 { + return it.uid +} + +func (it *SQLIterator) Reset() { + it.err = nil + it.Close() +} + +func (it *SQLIterator) Err() error { + return it.err +} + +func (it *SQLIterator) Close() error { + if it.cursor != nil { + err := it.cursor.Close() + if err != nil { + return err + } + it.cursor = nil + } + return nil +} + +func (it *SQLIterator) Tagger() *graph.Tagger { + return it.sql.Tagger() +} + +func (it *SQLIterator) Result() graph.Value { + return it.sql.Result() +} + +func (it *SQLIterator) TagResults(dst map[string]graph.Value) { + for tag, value := range it.result { + if tag == "__execd" { + for _, tag := range it.Tagger().Tags() { + dst[tag] = value + } + continue + } + dst[tag] = value + } + + for tag, value := range it.Tagger().Fixed() { + dst[tag] = value + } +} + +func (it *SQLIterator) Type() graph.Type { + return sqlType +} + +func (it *SQLIterator) SubIterators() []graph.Iterator { + return nil +} + +func (it *SQLIterator) Sorted() bool { return false } +func (it *SQLIterator) Optimize() (graph.Iterator, bool) { return it, false } + +func (it *SQLIterator) Size() (int64, bool) { + return it.sql.Size(it.qs) +} + +func (it *SQLIterator) Describe() graph.Description { + size, _ := it.Size() + return graph.Description{ + UID: it.UID(), + Name: it.sql.Describe(), + Type: it.Type(), + Size: size, + } +} + +func (it *SQLIterator) Stats() graph.IteratorStats { + size, _ := it.Size() + return graph.IteratorStats{ + ContainsCost: 1, + NextCost: 5, + Size: size, + } +} + +func (it *SQLIterator) NextPath() bool { + it.resultIndex += 1 + if it.resultIndex >= len(it.resultList) { + return false + } + it.buildResult(it.resultIndex) + return true +} + +func (it *SQLIterator) Next() bool { + var err error + graph.NextLogIn(it) + if it.cursor == nil { + err = it.makeCursor(true, nil) + it.cols, err = it.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + it.err = err + it.cursor.Close() + return false + } + // iterate the first one + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + return false + } + s, err := scan(it.cursor, len(it.cols)) + if err != nil { + it.err = err + it.cursor.Close() + return false + } + it.resultNext = append(it.resultNext, s) + } + if it.resultList != nil && it.resultNext == nil { + // We're on something and there's no next + return false + } + it.resultList = it.resultNext + it.resultNext = nil + it.resultIndex = 0 + for { + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + break + } + s, err := scan(it.cursor, len(it.cols)) + if err != nil { + it.err = err + it.cursor.Close() + return false + } + + if it.sql.sameTopResult(it.resultList[0], s) { + it.resultList = append(it.resultList, s) + } else { + it.resultNext = append(it.resultNext, s) + break + } + } + + if len(it.resultList) == 0 { + return graph.NextLogOut(it, nil, false) + } + it.buildResult(0) + return graph.NextLogOut(it, it.Result(), true) +} + +func (it *SQLIterator) Contains(v graph.Value) bool { + var err error + if ok, res := it.sql.quickContains(v); ok { + return res + } + err = it.makeCursor(false, v) + if err != nil { + glog.Errorf("Couldn't make query: %v", err) + it.err = err + it.cursor.Close() + return false + } + it.cols, err = it.cursor.Columns() + if err != nil { + glog.Errorf("Couldn't get columns") + it.err = err + it.cursor.Close() + return false + } + it.resultList = nil + for { + if !it.cursor.Next() { + glog.V(4).Infoln("sql: No next") + err := it.cursor.Err() + if err != nil { + glog.Errorf("Cursor error in SQL: %v", err) + it.err = err + } + it.cursor.Close() + break + } + s, err := scan(it.cursor, len(it.cols)) + if err != nil { + it.err = err + it.cursor.Close() + return false + } + it.resultList = append(it.resultList, s) + } + it.cursor.Close() + it.cursor = nil + if len(it.resultList) != 0 { + it.resultIndex = 0 + it.buildResult(0) + return true + } + return false +} + +func scan(cursor *sql.Rows, nCols int) ([]string, error) { + pointers := make([]interface{}, nCols) + container := make([]string, nCols) + for i, _ := range pointers { + pointers[i] = &container[i] + } + err := cursor.Scan(pointers...) + if err != nil { + glog.Errorf("Error scanning iterator: %v", err) + return nil, err + } + return container, nil +} + +func (it *SQLIterator) buildResult(i int) { + it.result = it.sql.buildResult(it.resultList[i], it.cols) +} + +func (it *SQLIterator) makeCursor(next bool, value graph.Value) error { + if it.cursor != nil { + it.cursor.Close() + } + var q string + var values []string + q, values = it.sql.buildSQL(next, value) + q = convertToPostgres(q, values) + ivalues := make([]interface{}, 0, len(values)) + for _, v := range values { + ivalues = append(ivalues, v) + } + cursor, err := it.qs.db.Query(q, ivalues...) + if err != nil { + glog.Errorf("Couldn't get cursor from SQL database: %v", err) + cursor = nil + return err + } + it.cursor = cursor + return nil +} + +func convertToPostgres(query string, values []string) string { + for i := 1; i <= len(values); i++ { + query = strings.Replace(query, "?", fmt.Sprintf("$%d", i), 1) + } + return query +} + +func NewSQLLinkIterator(qs *QuadStore, d quad.Direction, val string) *SQLIterator { + l := &SQLIterator{ + uid: iterator.NextUID(), + qs: qs, + sql: &SQLLinkIterator{ + constraints: []constraint{ + constraint{ + dir: d, + vals: []string{val}, + }, + }, + tableName: newTableName(), + size: 0, + }, + } + return l +} + +func NewSQLIterator(qs *QuadStore, sql sqlIterator) *SQLIterator { + l := &SQLIterator{ + uid: iterator.NextUID(), + qs: qs, + sql: sql, + } + return l +} diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go index 1d71a16..8e7a805 100644 --- a/graph/sql/sql_link_iterator.go +++ b/graph/sql/sql_link_iterator.go @@ -15,22 +15,18 @@ package sql import ( - "database/sql" "fmt" "strings" "sync/atomic" "github.com/barakmich/glog" "github.com/google/cayley/graph" - "github.com/google/cayley/graph/iterator" "github.com/google/cayley/quad" ) -var sqlLinkType graph.Type var sqlTableID uint64 func init() { - sqlLinkType = graph.RegisterIterator("sqllink") atomic.StoreUint64(&sqlTableID, 0) } @@ -73,59 +69,39 @@ type sqlItDir struct { } type sqlIterator interface { - buildSQL(next bool, val graph.Value) (string, []string) sqlClone() sqlIterator + + buildSQL(next bool, val graph.Value) (string, []string) getTables() []tableDef getTags() []tagDir buildWhere() (string, []string) tableID() tagDir + + quickContains(graph.Value) (ok bool, result bool) + buildResult(result []string, cols []string) map[string]string + sameTopResult(target []string, test []string) bool + + Result() graph.Value + Size(*QuadStore) (int64, bool) + Describe() string + Type() sqlQueryType + Tagger() *graph.Tagger } type SQLLinkIterator struct { - uid uint64 - qs *QuadStore tagger graph.Tagger - err error - cursor *sql.Rows nodeIts []sqlItDir constraints []constraint tableName string size int64 tagdirs []tagDir - result map[string]string - resultIndex int - resultList [][]string - resultNext [][]string - cols []string - resultQuad quad.Quad -} - -func NewSQLLinkIterator(qs *QuadStore, d quad.Direction, val string) *SQLLinkIterator { - l := &SQLLinkIterator{ - uid: iterator.NextUID(), - qs: qs, - constraints: []constraint{ - constraint{ - dir: d, - vals: []string{val}, - }, - }, - tableName: newTableName(), - size: 0, - } - return l + resultQuad quad.Quad } func (l *SQLLinkIterator) sqlClone() sqlIterator { - return l.Clone().(*SQLLinkIterator) -} - -func (l *SQLLinkIterator) Clone() graph.Iterator { m := &SQLLinkIterator{ - uid: iterator.NextUID(), - qs: l.qs, tableName: l.tableName, size: l.size, constraints: make([]constraint, len(l.constraints)), @@ -139,34 +115,10 @@ func (l *SQLLinkIterator) Clone() graph.Iterator { } copy(m.constraints, l.constraints) copy(m.tagdirs, l.tagdirs) - m.tagger.CopyFrom(l) + m.tagger.CopyFromTagger(l.Tagger()) return m } -func (l *SQLLinkIterator) UID() uint64 { - return l.uid -} - -func (l *SQLLinkIterator) Reset() { - l.err = nil - l.Close() -} - -func (l *SQLLinkIterator) Err() error { - return l.err -} - -func (l *SQLLinkIterator) Close() error { - if l.cursor != nil { - err := l.cursor.Close() - if err != nil { - return err - } - l.cursor = nil - } - return nil -} - func (l *SQLLinkIterator) Tagger() *graph.Tagger { return &l.tagger } @@ -175,70 +127,30 @@ func (l *SQLLinkIterator) Result() graph.Value { return l.resultQuad } -func (l *SQLLinkIterator) TagResults(dst map[string]graph.Value) { - for tag, value := range l.result { - if tag == "__execd" { - for _, tag := range l.tagger.Tags() { - dst[tag] = value - } - continue - } - dst[tag] = value - } - - for tag, value := range l.tagger.Fixed() { - dst[tag] = value - } -} - -func (l *SQLLinkIterator) SubIterators() []graph.Iterator { - // TODO(barakmich): SQL Subiterators shouldn't count? If it makes sense, - // there's no reason not to expose them though. - return nil -} - -func (l *SQLLinkIterator) Sorted() bool { return false } -func (l *SQLLinkIterator) Optimize() (graph.Iterator, bool) { return l, false } - -func (l *SQLLinkIterator) Size() (int64, bool) { +func (l *SQLLinkIterator) Size(qs *QuadStore) (int64, bool) { if l.size != 0 { return l.size, true } if len(l.constraints) > 0 { - l.size = l.qs.sizeForIterator(false, l.constraints[0].dir, l.constraints[0].vals[0]) + l.size = qs.sizeForIterator(false, l.constraints[0].dir, l.constraints[0].vals[0]) } else if len(l.nodeIts) > 1 { - subsize, _ := l.nodeIts[0].it.(*SQLNodeIterator).Size() + subsize, _ := l.nodeIts[0].it.(*SQLNodeIterator).Size(qs) return subsize * 20, false } else { - return l.qs.Size(), false + return qs.Size(), false } return l.size, true } -func (l *SQLLinkIterator) Describe() graph.Description { - size, _ := l.Size() - return graph.Description{ - UID: l.UID(), - Name: fmt.Sprintf("SQL_LINK_QUERY: %#v", l), - Type: l.Type(), - Size: size, - } +func (l *SQLLinkIterator) Describe() string { + return fmt.Sprintf("SQL_LINK_QUERY: %#v", l) } -func (l *SQLLinkIterator) Stats() graph.IteratorStats { - size, _ := l.Size() - return graph.IteratorStats{ - ContainsCost: 1, - NextCost: 5, - Size: size, - } +func (l *SQLLinkIterator) Type() sqlQueryType { + return link } -func (l *SQLLinkIterator) Type() graph.Type { - return sqlLinkType -} - -func (l *SQLLinkIterator) preFilter(v graph.Value) bool { +func (l *SQLLinkIterator) quickContains(v graph.Value) (bool, bool) { for _, c := range l.constraints { none := true desired := v.(quad.Quad).Get(c.dir) @@ -249,85 +161,27 @@ func (l *SQLLinkIterator) preFilter(v graph.Value) bool { } } if none { - return true + return true, false } } - return false -} - -func (l *SQLLinkIterator) Contains(v graph.Value) bool { - var err error - if l.preFilter(v) { - return false - } if len(l.nodeIts) == 0 { - return true + return true, true } - err = l.makeCursor(false, v) - if err != nil { - glog.Errorf("Couldn't make query: %v", err) - l.err = err - l.cursor.Close() - return false - } - l.cols, err = l.cursor.Columns() - if err != nil { - glog.Errorf("Couldn't get columns") - l.err = err - l.cursor.Close() - return false - } - l.resultList = nil - for { - if !l.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := l.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - l.err = err - } - l.cursor.Close() - break - } - s, err := scan(l.cursor, len(l.cols)) - if err != nil { - l.err = err - l.cursor.Close() - return false - } - l.resultList = append(l.resultList, s) - } - l.cursor.Close() - l.cursor = nil - if len(l.resultList) != 0 { - l.resultIndex = 0 - l.buildResult(0) - return true - } - return false + return false, false } -func (l *SQLLinkIterator) NextPath() bool { - l.resultIndex += 1 - if l.resultIndex >= len(l.resultList) { - return false - } - l.buildResult(l.resultIndex) - return true -} - -func (l *SQLLinkIterator) buildResult(i int) { - container := l.resultList[i] +func (l *SQLLinkIterator) buildResult(result []string, cols []string) map[string]string { var q quad.Quad - q.Subject = container[0] - q.Predicate = container[1] - q.Object = container[2] - q.Label = container[3] + q.Subject = result[0] + q.Predicate = result[1] + q.Object = result[2] + q.Label = result[3] l.resultQuad = q - l.result = make(map[string]string) - for i, c := range l.cols[4:] { - l.result[c] = container[i+4] + m := make(map[string]string) + for i, c := range cols[4:] { + m[c] = result[i+4] } + return m } func (l *SQLLinkIterator) getTables() []tableDef { @@ -448,119 +302,6 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string return query, values } -func convertToPostgres(query string, values []string) string { - for i := 1; i <= len(values); i++ { - query = strings.Replace(query, "?", fmt.Sprintf("$%d", i), 1) - } - return query -} - -func (l *SQLLinkIterator) makeCursor(next bool, value graph.Value) error { - if l.cursor != nil { - l.cursor.Close() - } - var q string - var values []string - q, values = l.buildSQL(next, value) - q = convertToPostgres(q, values) - ivalues := make([]interface{}, 0, len(values)) - for _, v := range values { - ivalues = append(ivalues, v) - } - cursor, err := l.qs.db.Query(q, ivalues...) - if err != nil { - glog.Errorf("Couldn't get cursor from SQL database: %v", err) - cursor = nil - return err - } - l.cursor = cursor - return nil -} - -func scan(cursor *sql.Rows, nCols int) ([]string, error) { - pointers := make([]interface{}, nCols) - container := make([]string, nCols) - for i, _ := range pointers { - pointers[i] = &container[i] - } - err := cursor.Scan(pointers...) - if err != nil { - glog.Errorf("Error scanning iterator: %v", err) - return nil, err - } - return container, nil -} - -func (l *SQLLinkIterator) Next() bool { - var err error - graph.NextLogIn(l) - if l.cursor == nil { - err = l.makeCursor(true, nil) - l.cols, err = l.cursor.Columns() - if err != nil { - glog.Errorf("Couldn't get columns") - l.err = err - l.cursor.Close() - return false - } - // iterate the first one - if !l.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := l.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - l.err = err - } - l.cursor.Close() - return false - } - s, err := scan(l.cursor, len(l.cols)) - if err != nil { - l.err = err - l.cursor.Close() - return false - } - l.resultNext = append(l.resultNext, s) - } - if l.resultList != nil && l.resultNext == nil { - // We're on something and there's no next - return false - } - l.resultList = l.resultNext - l.resultNext = nil - l.resultIndex = 0 - for { - if !l.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := l.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - l.err = err - } - l.cursor.Close() - break - } - s, err := scan(l.cursor, len(l.cols)) - if err != nil { - l.err = err - l.cursor.Close() - return false - } - if l.resultList[0][0] == s[0] && l.resultList[0][1] == s[1] && l.resultList[0][2] == s[2] && l.resultList[0][3] == s[3] { - l.resultList = append(l.resultList, s) - } else { - l.resultNext = append(l.resultNext, s) - break - } - - } - if len(l.resultList) == 0 { - return graph.NextLogOut(l, nil, false) - } - l.buildResult(0) - return graph.NextLogOut(l, l.Result(), true) -} - -type SQLAllIterator struct { - // TBD +func (l *SQLLinkIterator) sameTopResult(target []string, test []string) bool { + return target[0] == test[0] && target[1] == test[1] && target[2] == test[2] && target[3] == test[3] } diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index 58efc9b..90e80e8 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -15,22 +15,25 @@ package sql import ( - "database/sql" "fmt" "strings" "sync/atomic" "github.com/barakmich/glog" "github.com/google/cayley/graph" - "github.com/google/cayley/graph/iterator" "github.com/google/cayley/quad" ) -var sqlNodeType graph.Type var sqlNodeTableID uint64 +type sqlQueryType int + +const ( + node sqlQueryType = iota + link +) + func init() { - sqlNodeType = graph.RegisterIterator("sqlnode") atomic.StoreUint64(&sqlNodeTableID, 0) } @@ -40,34 +43,20 @@ func newNodeTableName() string { } type SQLNodeIterator struct { - uid uint64 - qs *QuadStore - tagger graph.Tagger tableName string - err error - cursor *sql.Rows linkIts []sqlItDir nodetables []string size int64 + tagger graph.Tagger - result map[string]string - resultIndex int - resultList [][]string - resultNext [][]string - cols []string + result string } func (n *SQLNodeIterator) sqlClone() sqlIterator { - return n.Clone().(*SQLNodeIterator) -} - -func (n *SQLNodeIterator) Clone() graph.Iterator { m := &SQLNodeIterator{ - uid: iterator.NextUID(), - qs: n.qs, - size: n.size, tableName: n.tableName, + size: n.size, } for _, i := range n.linkIts { m.linkIts = append(m.linkIts, sqlItDir{ @@ -75,109 +64,39 @@ func (n *SQLNodeIterator) Clone() graph.Iterator { it: i.it.sqlClone(), }) } - m.tagger.CopyFrom(n) + m.tagger.CopyFromTagger(n.Tagger()) return m } -func (n *SQLNodeIterator) UID() uint64 { - return n.uid -} - -func (n *SQLNodeIterator) Reset() { - n.err = nil - n.Close() -} - -func (n *SQLNodeIterator) Err() error { - return n.err -} - -func (n *SQLNodeIterator) Close() error { - if n.cursor != nil { - err := n.cursor.Close() - if err != nil { - return err - } - n.cursor = nil - } - return nil -} - func (n *SQLNodeIterator) Tagger() *graph.Tagger { return &n.tagger } func (n *SQLNodeIterator) Result() graph.Value { - return n.result["__execd"] + return n.result } -func (n *SQLNodeIterator) TagResults(dst map[string]graph.Value) { - for tag, value := range n.result { - if tag == "__execd" { - for _, tag := range n.tagger.Tags() { - dst[tag] = value - } - continue +func (n *SQLNodeIterator) Type() sqlQueryType { + return node +} + +func (n *SQLNodeIterator) Size(qs *QuadStore) (int64, bool) { + return qs.Size() / int64(len(n.linkIts)+1), true +} + +func (n *SQLNodeIterator) Describe() string { + return fmt.Sprintf("SQL_NODE_QUERY: %#v", n) +} + +func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string]string { + m := make(map[string]string) + for i, c := range cols { + if c == "__execd" { + n.result = result[i] } - dst[tag] = value - } - - for tag, value := range n.tagger.Fixed() { - dst[tag] = value - } -} - -func (n *SQLNodeIterator) Type() graph.Type { - return sqlNodeType -} - -func (n *SQLNodeIterator) SubIterators() []graph.Iterator { - // TODO(barakmich): SQL Subiterators shouldn't count? If it makes sense, - // there's no reason not to expose them though. - return nil -} - -func (n *SQLNodeIterator) Sorted() bool { return false } -func (n *SQLNodeIterator) Optimize() (graph.Iterator, bool) { return n, false } - -func (n *SQLNodeIterator) Size() (int64, bool) { - return n.qs.Size() / int64(len(n.linkIts)+1), true -} - -func (n *SQLNodeIterator) Describe() graph.Description { - size, _ := n.Size() - return graph.Description{ - UID: n.UID(), - Name: fmt.Sprintf("SQL_NODE_QUERY: %#v", n), - Type: n.Type(), - Size: size, - } -} - -func (n *SQLNodeIterator) Stats() graph.IteratorStats { - size, _ := n.Size() - return graph.IteratorStats{ - ContainsCost: 1, - NextCost: 5, - Size: size, - } -} - -func (n *SQLNodeIterator) NextPath() bool { - n.resultIndex += 1 - if n.resultIndex >= len(n.resultList) { - return false - } - n.buildResult(n.resultIndex) - return true -} - -func (n *SQLNodeIterator) buildResult(i int) { - container := n.resultList[i] - n.result = make(map[string]string) - for i, c := range n.cols { - n.result[c] = container[i] + m[c] = result[i] } + return m } func (n *SQLNodeIterator) makeNodeTableNames() { @@ -215,7 +134,6 @@ func (n *SQLNodeIterator) buildSubqueries() []tableDef { // separate SQL iterators to build a similar tree as we're doing here, and // have a single graph.Iterator 'caddy' structure around it. subNode := &SQLNodeIterator{ - uid: iterator.NextUID(), tableName: newTableName(), linkIts: []sqlItDir{it}, } @@ -351,144 +269,8 @@ func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string return query, values } -func (n *SQLNodeIterator) Next() bool { - var err error - graph.NextLogIn(n) - if n.cursor == nil { - err = n.makeCursor(true, nil) - n.cols, err = n.cursor.Columns() - if err != nil { - glog.Errorf("Couldn't get columns") - n.err = err - n.cursor.Close() - return false - } - // iterate the first one - if !n.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := n.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - n.err = err - } - n.cursor.Close() - return false - } - s, err := scan(n.cursor, len(n.cols)) - if err != nil { - n.err = err - n.cursor.Close() - return false - } - n.resultNext = append(n.resultNext, s) - } - if n.resultList != nil && n.resultNext == nil { - // We're on something and there's no next - return false - } - n.resultList = n.resultNext - n.resultNext = nil - n.resultIndex = 0 - for { - if !n.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := n.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - n.err = err - } - n.cursor.Close() - break - } - s, err := scan(n.cursor, len(n.cols)) - if err != nil { - n.err = err - n.cursor.Close() - return false - } - if n.resultList[0][0] != s[0] { - n.resultNext = append(n.resultNext, s) - break - } else { - n.resultList = append(n.resultList, s) - } - - } - if len(n.resultList) == 0 { - return graph.NextLogOut(n, nil, false) - } - n.buildResult(0) - return graph.NextLogOut(n, n.Result(), true) +func (n *SQLNodeIterator) sameTopResult(target []string, test []string) bool { + return target[0] == test[0] } -func (n *SQLNodeIterator) makeCursor(next bool, value graph.Value) error { - if n.cursor != nil { - n.cursor.Close() - } - var q string - var values []string - q, values = n.buildSQL(next, value) - q = convertToPostgres(q, values) - ivalues := make([]interface{}, 0, len(values)) - for _, v := range values { - ivalues = append(ivalues, v) - } - cursor, err := n.qs.db.Query(q, ivalues...) - if err != nil { - glog.Errorf("Couldn't get cursor from SQL database: %v", err) - glog.Errorf("Query: %v", q) - cursor = nil - return err - } - n.cursor = cursor - return nil -} - -func (n *SQLNodeIterator) Contains(v graph.Value) bool { - var err error - //if it.preFilter(v) { - //return false - //} - err = n.makeCursor(false, v) - if err != nil { - glog.Errorf("Couldn't make query: %v", err) - n.err = err - n.cursor.Close() - return false - } - n.cols, err = n.cursor.Columns() - if err != nil { - glog.Errorf("Couldn't get columns") - n.err = err - n.cursor.Close() - return false - } - n.resultList = nil - for { - if !n.cursor.Next() { - glog.V(4).Infoln("sql: No next") - err := n.cursor.Err() - if err != nil { - glog.Errorf("Cursor error in SQL: %v", err) - n.err = err - } - n.cursor.Close() - break - } - s, err := scan(n.cursor, len(n.cols)) - if err != nil { - n.err = err - n.cursor.Close() - return false - } - n.resultList = append(n.resultList, s) - } - n.cursor.Close() - n.cursor = nil - if len(n.resultList) != 0 { - n.resultIndex = 0 - n.buildResult(0) - return true - } - return false -} +func (n *SQLNodeIterator) quickContains(_ graph.Value) (bool, bool) { return false, false } From 8830760df242cbd7d30060b338b8cb5e738a2e15 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Thu, 30 Jul 2015 18:40:48 -0400 Subject: [PATCH 10/18] Split intersection and node iteration into two logical SQL iterators --- graph/sql/optimizers.go | 33 ++++-- graph/sql/sql_node_intersection.go | 209 +++++++++++++++++++++++++++++++++++++ graph/sql/sql_node_iterator.go | 116 +++++--------------- 3 files changed, 259 insertions(+), 99 deletions(-) create mode 100644 graph/sql/sql_node_intersection.go diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index debf7a8..68e1e3b 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -28,6 +28,10 @@ func intersect(a sqlIterator, b sqlIterator, qs *QuadStore) (*SQLIterator, error if bnew, ok := b.(*SQLNodeIterator); ok { return intersectNode(anew, bnew, qs) } + } else if anew, ok := a.(*SQLNodeIntersection); ok { + if bnew, ok := b.(*SQLNodeIterator); ok { + return appendNodeIntersection(anew, bnew, qs) + } } else if anew, ok := a.(*SQLLinkIterator); ok { if bnew, ok := b.(*SQLLinkIterator); ok { return intersectLink(anew, bnew, qs) @@ -40,9 +44,20 @@ func intersect(a sqlIterator, b sqlIterator, qs *QuadStore) (*SQLIterator, error } func intersectNode(a *SQLNodeIterator, b *SQLNodeIterator, qs *QuadStore) (*SQLIterator, error) { - m := &SQLNodeIterator{ + m := &SQLNodeIntersection{ tableName: newTableName(), - linkIts: append(a.linkIts, b.linkIts...), + nodeIts: []sqlIterator{a, b}, + } + m.Tagger().CopyFromTagger(a.Tagger()) + m.Tagger().CopyFromTagger(b.Tagger()) + it := NewSQLIterator(qs, m) + return it, nil +} + +func appendNodeIntersection(a *SQLNodeIntersection, b *SQLNodeIterator, qs *QuadStore) (*SQLIterator, error) { + m := &SQLNodeIntersection{ + tableName: newTableName(), + nodeIts: append(a.nodeIts, b), } m.Tagger().CopyFromTagger(a.Tagger()) m.Tagger().CopyFromTagger(b.Tagger()) @@ -71,11 +86,9 @@ func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, err out := &SQLNodeIterator{ tableName: newTableName(), - linkIts: []sqlItDir{ - sqlItDir{ - it: a, - dir: d, - }, + linkIt: sqlItDir{ + it: a, + dir: d, }, } it := NewSQLIterator(qs, out) @@ -83,9 +96,13 @@ func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, err } func linksto(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, error) { + var a sqlIterator a, ok := aIn.(*SQLNodeIterator) if !ok { - return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") + a, ok = aIn.(*SQLNodeIntersection) + if !ok { + return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") + } } out := &SQLLinkIterator{ diff --git a/graph/sql/sql_node_intersection.go b/graph/sql/sql_node_intersection.go new file mode 100644 index 0000000..d010d66 --- /dev/null +++ b/graph/sql/sql_node_intersection.go @@ -0,0 +1,209 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "fmt" + "strings" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/quad" +) + +type SQLNodeIntersection struct { + tableName string + + nodeIts []sqlIterator + nodetables []string + size int64 + tagger graph.Tagger + + result string +} + +func (n *SQLNodeIntersection) sqlClone() sqlIterator { + m := &SQLNodeIntersection{ + tableName: n.tableName, + size: n.size, + } + for _, i := range n.nodeIts { + m.nodeIts = append(m.nodeIts, i.sqlClone()) + } + m.tagger.CopyFromTagger(n.Tagger()) + return m +} + +func (n *SQLNodeIntersection) Tagger() *graph.Tagger { + return &n.tagger +} + +func (n *SQLNodeIntersection) Result() graph.Value { + return n.result +} + +func (n *SQLNodeIntersection) Type() sqlQueryType { + return nodeIntersect +} + +func (n *SQLNodeIntersection) Size(qs *QuadStore) (int64, bool) { + return qs.Size() / int64(len(n.nodeIts)+1), true +} + +func (n *SQLNodeIntersection) Describe() string { + return fmt.Sprintf("SQL_NODE_INTERSECTION: %#v", n) +} + +func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string { + m := make(map[string]string) + for i, c := range cols { + if c == "__execd" { + n.result = result[i] + } + m[c] = result[i] + } + return m +} + +func (n *SQLNodeIntersection) makeNodeTableNames() { + if n.nodetables != nil { + return + } + n.nodetables = make([]string, len(n.nodeIts)) + for i, _ := range n.nodetables { + n.nodetables[i] = newNodeTableName() + } +} + +func (n *SQLNodeIntersection) getTables() []tableDef { + if len(n.nodeIts) == 0 { + panic("Combined no subnode queries") + } + return n.buildSubqueries() +} + +func (n *SQLNodeIntersection) buildSubqueries() []tableDef { + var out []tableDef + n.makeNodeTableNames() + for i, it := range n.nodeIts { + var td tableDef + var table string + table, td.values = it.buildSQL(true, nil) + td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) + td.name = n.nodetables[i] + out = append(out, td) + } + return out +} + +func (n *SQLNodeIntersection) tableID() tagDir { + n.makeNodeTableNames() + return tagDir{ + table: n.nodetables[0], + dir: quad.Any, + tag: "__execd", + } +} + +func (n *SQLNodeIntersection) getLocalTags() []tagDir { + myTag := n.tableID() + var out []tagDir + for _, tag := range n.tagger.Tags() { + out = append(out, tagDir{ + dir: myTag.dir, + table: myTag.table, + tag: tag, + justLocal: true, + }) + } + return out +} + +func (n *SQLNodeIntersection) getTags() []tagDir { + out := n.getLocalTags() + n.makeNodeTableNames() + for i, it := range n.nodeIts { + for _, v := range it.getTags() { + out = append(out, tagDir{ + tag: v.tag, + dir: quad.Any, + table: n.nodetables[i], + }) + } + } + return out +} + +func (n *SQLNodeIntersection) buildWhere() (string, []string) { + var q []string + var vals []string + for _, tb := range n.nodetables[1:] { + q = append(q, fmt.Sprintf("%s.__execd = %s.__execd", n.nodetables[0], tb)) + } + query := strings.Join(q, " AND ") + return query, vals +} + +func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []string) { + topData := n.tableID() + tags := []tagDir{topData} + tags = append(tags, n.getTags()...) + query := "SELECT DISTINCT " + var t []string + for _, v := range tags { + t = append(t, v.String()) + } + query += strings.Join(t, ", ") + query += " FROM " + t = []string{} + var values []string + for _, k := range n.getTables() { + values = append(values, k.values...) + t = append(t, fmt.Sprintf("%s as %s", k.table, k.name)) + } + query += strings.Join(t, ", ") + query += " WHERE " + + constraint, wherevalues := n.buildWhere() + values = append(values, wherevalues...) + + if !next { + v := val.(string) + if constraint != "" { + constraint += " AND " + } + constraint += fmt.Sprintf("%s.%s = ?", topData.table, topData.dir) + values = append(values, v) + } + query += constraint + query += ";" + + glog.V(2).Infoln(query) + + if glog.V(4) { + dstr := query + for i := 1; i <= len(values); i++ { + dstr = strings.Replace(dstr, "?", fmt.Sprintf("'%s'", values[i-1]), 1) + } + glog.V(4).Infoln(dstr) + } + return query, values +} + +func (n *SQLNodeIntersection) sameTopResult(target []string, test []string) bool { + return target[0] == test[0] +} + +func (n *SQLNodeIntersection) quickContains(_ graph.Value) (bool, bool) { return false, false } diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index 90e80e8..3d9ec21 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -31,6 +31,7 @@ type sqlQueryType int const ( node sqlQueryType = iota link + nodeIntersect ) func init() { @@ -45,10 +46,9 @@ func newNodeTableName() string { type SQLNodeIterator struct { tableName string - linkIts []sqlItDir - nodetables []string - size int64 - tagger graph.Tagger + linkIt sqlItDir + size int64 + tagger graph.Tagger result string } @@ -57,12 +57,10 @@ func (n *SQLNodeIterator) sqlClone() sqlIterator { m := &SQLNodeIterator{ tableName: n.tableName, size: n.size, - } - for _, i := range n.linkIts { - m.linkIts = append(m.linkIts, sqlItDir{ - dir: i.dir, - it: i.it.sqlClone(), - }) + linkIt: sqlItDir{ + dir: n.linkIt.dir, + it: n.linkIt.it.sqlClone(), + }, } m.tagger.CopyFromTagger(n.Tagger()) return m @@ -81,7 +79,7 @@ func (n *SQLNodeIterator) Type() sqlQueryType { } func (n *SQLNodeIterator) Size(qs *QuadStore) (int64, bool) { - return qs.Size() / int64(len(n.linkIts)+1), true + return qs.Size() / 2, true } func (n *SQLNodeIterator) Describe() string { @@ -99,25 +97,10 @@ func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string return m } -func (n *SQLNodeIterator) makeNodeTableNames() { - if n.nodetables != nil { - return - } - n.nodetables = make([]string, len(n.linkIts)) - for i, _ := range n.nodetables { - n.nodetables[i] = newNodeTableName() - } -} - func (n *SQLNodeIterator) getTables() []tableDef { var out []tableDef - switch len(n.linkIts) { - case 0: - return []tableDef{tableDef{table: "quads", name: n.tableName}} - case 1: - out = n.linkIts[0].it.getTables() - default: - return n.buildSubqueries() + if n.linkIt.it != nil { + out = n.linkIt.it.getTables() } if len(out) == 0 { out = append(out, tableDef{table: "quads", name: n.tableName}) @@ -125,49 +108,19 @@ func (n *SQLNodeIterator) getTables() []tableDef { return out } -func (n *SQLNodeIterator) buildSubqueries() []tableDef { - var out []tableDef - n.makeNodeTableNames() - for i, it := range n.linkIts { - var td tableDef - // TODO(barakmich): This is a dirty hack. The real implementation is to - // separate SQL iterators to build a similar tree as we're doing here, and - // have a single graph.Iterator 'caddy' structure around it. - subNode := &SQLNodeIterator{ - tableName: newTableName(), - linkIts: []sqlItDir{it}, - } - var table string - table, td.values = subNode.buildSQL(true, nil) - td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) - td.name = n.nodetables[i] - out = append(out, td) - } - return out -} - func (n *SQLNodeIterator) tableID() tagDir { - switch len(n.linkIts) { - case 0: + if n.linkIt.it != nil { return tagDir{ - table: n.tableName, - dir: quad.Any, - tag: "__execd", - } - case 1: - return tagDir{ - table: n.linkIts[0].it.tableID().table, - dir: n.linkIts[0].dir, - tag: "__execd", - } - default: - n.makeNodeTableNames() - return tagDir{ - table: n.nodetables[0], - dir: quad.Any, + table: n.linkIt.it.tableID().table, + dir: n.linkIt.dir, tag: "__execd", } } + return tagDir{ + table: n.tableName, + dir: quad.Any, + tag: "__execd", + } } func (n *SQLNodeIterator) getLocalTags() []tagDir { @@ -186,21 +139,8 @@ func (n *SQLNodeIterator) getLocalTags() []tagDir { func (n *SQLNodeIterator) getTags() []tagDir { out := n.getLocalTags() - if len(n.linkIts) > 1 { - n.makeNodeTableNames() - for i, it := range n.linkIts { - for _, v := range it.it.getTags() { - out = append(out, tagDir{ - tag: v.tag, - dir: quad.Any, - table: n.nodetables[i], - }) - } - } - return out - } - for _, i := range n.linkIts { - out = append(out, i.it.getTags()...) + if n.linkIt.it != nil { + out = append(out, n.linkIt.it.getTags()...) } return out } @@ -208,16 +148,10 @@ func (n *SQLNodeIterator) getTags() []tagDir { func (n *SQLNodeIterator) buildWhere() (string, []string) { var q []string var vals []string - if len(n.linkIts) > 1 { - for _, tb := range n.nodetables[1:] { - q = append(q, fmt.Sprintf("%s.__execd = %s.__execd", n.nodetables[0], tb)) - } - } else { - for _, i := range n.linkIts { - s, v := i.it.buildWhere() - q = append(q, s) - vals = append(vals, v...) - } + if n.linkIt.it != nil { + s, v := n.linkIt.it.buildWhere() + q = append(q, s) + vals = append(vals, v...) } query := strings.Join(q, " AND ") return query, vals From c98318aa97ac45eaf4b78492ecbd88ddf2f8cf7e Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Fri, 31 Jul 2015 13:44:10 -0400 Subject: [PATCH 11/18] Fix tests and update Godep --- Godeps/Godeps.json | 13 ++++--- graph/sql/optimizers.go | 4 +-- graph/sql/optimizers_test.go | 70 +++++++++++++++++-------------------- graph/sql/sql_link_iterator_test.go | 42 +++++++++++----------- 4 files changed, 66 insertions(+), 63 deletions(-) diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index ddd8c93..35c2120 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -6,10 +6,6 @@ ], "Deps": [ { - "ImportPath": "github.com/pborman/uuid", - "Rev": "ca53cad383cad2479bbba7f7a1a05797ec1386e4" - }, - { "ImportPath": "github.com/badgerodon/peg", "Rev": "9e5f7f4d07ca576562618c23e8abadda278b684f" }, @@ -31,6 +27,15 @@ "Rev": "b59a38004596b696aca7aa2adccfa68760864d86" }, { + "ImportPath": "github.com/lib/pq", + "Comment": "go1.0-cutoff-58-g0dad96c", + "Rev": "0dad96c0b94f8dee039aa40467f767467392a0af" + }, + { + "ImportPath": "github.com/pborman/uuid", + "Rev": "ca53cad383cad2479bbba7f7a1a05797ec1386e4" + }, + { "ImportPath": "github.com/peterh/liner", "Rev": "1bb0d1c1a25ed393d8feb09bab039b2b1b1fbced" }, diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index 68e1e3b..f04543e 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -78,7 +78,7 @@ func intersectLink(a *SQLLinkIterator, b *SQLLinkIterator, qs *QuadStore) (*SQLI return it, nil } -func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, error) { +func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (*SQLIterator, error) { a, ok := aIn.(*SQLLinkIterator) if !ok { return nil, errors.New("Can't take the HASA of a link SQL iterator") @@ -95,7 +95,7 @@ func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, err return it, nil } -func linksto(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, error) { +func linksto(aIn sqlIterator, d quad.Direction, qs *QuadStore) (*SQLIterator, error) { var a sqlIterator a, ok := aIn.(*SQLNodeIterator) if !ok { diff --git a/graph/sql/optimizers_test.go b/graph/sql/optimizers_test.go index 229b91e..8903bb7 100644 --- a/graph/sql/optimizers_test.go +++ b/graph/sql/optimizers_test.go @@ -15,7 +15,6 @@ package sql import ( - "fmt" "testing" "github.com/google/cayley/graph" @@ -25,108 +24,105 @@ import ( func TestBuildIntersect(t *testing.T) { a := NewSQLLinkIterator(nil, quad.Subject, "Foo") b := NewSQLLinkIterator(nil, quad.Predicate, "is_equivalent_to") - it, err := intersect(a, b) - i := it.(*SQLLinkIterator) + it, err := intersect(a.sql, b.sql, nil) if err != nil { t.Error(err) } - s, v := i.buildSQL(true, nil) - fmt.Println(s, v) + s, v := it.sql.buildSQL(true, nil) + t.Log(s, v) } func TestBuildHasa(t *testing.T) { a := NewSQLLinkIterator(nil, quad.Subject, "Foo") - a.tagger.Add("foo") + a.Tagger().Add("foo") b := NewSQLLinkIterator(nil, quad.Predicate, "is_equivalent_to") - it1, err := intersect(a, b) + it1, err := intersect(a.sql, b.sql, nil) if err != nil { t.Error(err) } - it2, err := hasa(it1, quad.Object) - i2 := it2.(*SQLNodeIterator) + it2, err := hasa(it1.sql, quad.Object, nil) if err != nil { t.Error(err) } - s, v := i2.buildSQL(true, nil) - fmt.Println(s, v) + s, v := it2.sql.buildSQL(true, nil) + t.Log(s, v) } func TestBuildLinksTo(t *testing.T) { a := NewSQLLinkIterator(nil, quad.Subject, "Foo") b := NewSQLLinkIterator(nil, quad.Predicate, "is_equivalent_to") - it1, err := intersect(a, b) + it1, err := intersect(a.sql, b.sql, nil) if err != nil { t.Error(err) } - it2, err := hasa(it1, quad.Object) + it2, err := hasa(it1.sql, quad.Object, nil) it2.Tagger().Add("foo") if err != nil { t.Error(err) } - it3, err := linksto(it2, quad.Subject) + it3, err := linksto(it2.sql, quad.Subject, nil) if err != nil { t.Error(err) } - i3 := it3.(*SQLLinkIterator) - s, v := i3.buildSQL(true, nil) - fmt.Println(s, v) + s, v := it3.sql.buildSQL(true, nil) + t.Log(s, v) } func TestInterestingQuery(t *testing.T) { - if *dbpath == "" { + if *postgres_path == "" { t.SkipNow() } - db, err := newQuadStore(*dbpath, nil) + db, err := newQuadStore(*postgres_path, nil) if err != nil { t.Fatal(err) } - a := NewSQLLinkIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") - b := NewSQLLinkIterator(db.(*QuadStore), quad.Predicate, "name") - it1, err := intersect(a, b) + qs := db.(*QuadStore) + a := NewSQLLinkIterator(qs, quad.Object, "Humphrey Bogart") + b := NewSQLLinkIterator(qs, quad.Predicate, "name") + it1, err := intersect(a.sql, b.sql, qs) if err != nil { t.Error(err) } - it2, err := hasa(it1, quad.Subject) + it2, err := hasa(it1.sql, quad.Subject, qs) if err != nil { t.Error(err) } it2.Tagger().Add("hb") - it3, err := linksto(it2, quad.Object) + it3, err := linksto(it2.sql, quad.Object, qs) if err != nil { t.Error(err) } b = NewSQLLinkIterator(db.(*QuadStore), quad.Predicate, "/film/performance/actor") - it4, err := intersect(it3, b) + it4, err := intersect(it3.sql, b.sql, qs) if err != nil { t.Error(err) } - it5, err := hasa(it4, quad.Subject) + it5, err := hasa(it4.sql, quad.Subject, qs) if err != nil { t.Error(err) } - it6, err := linksto(it5, quad.Object) + it6, err := linksto(it5.sql, quad.Object, qs) if err != nil { t.Error(err) } b = NewSQLLinkIterator(db.(*QuadStore), quad.Predicate, "/film/film/starring") - it7, err := intersect(it6, b) + it7, err := intersect(it6.sql, b.sql, qs) if err != nil { t.Error(err) } - it8, err := hasa(it7, quad.Subject) + it8, err := hasa(it7.sql, quad.Subject, qs) if err != nil { t.Error(err) } - finalIt := it8.(*SQLNodeIterator) - s, v := finalIt.buildSQL(true, nil) - finalIt.Tagger().Add("id") - fmt.Println(s, v) - for graph.Next(finalIt) { - fmt.Println(finalIt.Result()) + s, v := it8.sql.buildSQL(true, nil) + it8.Tagger().Add("id") + t.Log(s, v) + for graph.Next(it8) { + t.Log(it8.Result()) out := make(map[string]graph.Value) - finalIt.TagResults(out) + it8.TagResults(out) for k, v := range out { - fmt.Printf("%s: %v\n", k, v.(string)) + t.Log("%s: %v\n", k, v.(string)) } } } diff --git a/graph/sql/sql_link_iterator_test.go b/graph/sql/sql_link_iterator_test.go index 5d66d2d..2eda766 100644 --- a/graph/sql/sql_link_iterator_test.go +++ b/graph/sql/sql_link_iterator_test.go @@ -24,29 +24,30 @@ import ( "github.com/google/cayley/quad" ) -var dbpath = flag.String("dbpath", "", "Path to running DB") +var postgres_path = flag.String("postgres_path", "", "Path to running DB") func TestSQLLink(t *testing.T) { it := NewSQLLinkIterator(nil, quad.Object, "cool") - s, v := it.buildSQL(true, nil) - fmt.Println(s, v) + s, v := it.sql.buildSQL(true, nil) + t.Log(s, v) } func TestSQLLinkIteration(t *testing.T) { - if *dbpath == "" { + if *postgres_path == "" { t.SkipNow() } - db, err := newQuadStore(*dbpath, nil) + db, err := newQuadStore(*postgres_path, nil) + qs := db.(*QuadStore) if err != nil { t.Fatal(err) } - it := NewSQLLinkIterator(db.(*QuadStore), quad.Object, "Humphrey Bogart") + it := NewSQLLinkIterator(qs, quad.Object, "Humphrey Bogart") for graph.Next(it) { fmt.Println(it.Result()) } - it = NewSQLLinkIterator(db.(*QuadStore), quad.Subject, "/en/casablanca_1942") - s, v := it.buildSQL(true, nil) - fmt.Println(s, v) + it = NewSQLLinkIterator(qs, quad.Subject, "/en/casablanca_1942") + s, v := it.sql.buildSQL(true, nil) + t.Log(s, v) c := 0 for graph.Next(it) { fmt.Println(it.Result()) @@ -58,29 +59,30 @@ func TestSQLLinkIteration(t *testing.T) { } func TestSQLNodeIteration(t *testing.T) { - if *dbpath == "" { + if *postgres_path == "" { t.SkipNow() } - db, err := newQuadStore(*dbpath, nil) + db, err := newQuadStore(*postgres_path, nil) if err != nil { t.Fatal(err) } link := NewSQLLinkIterator(db.(*QuadStore), quad.Object, "/en/humphrey_bogart") - it := &SQLNodeIterator{ - uid: iterator.NextUID(), - qs: db.(*QuadStore), - tableName: newTableName(), - linkIts: []sqlItDir{ - sqlItDir{it: link, + it := &SQLIterator{ + uid: iterator.NextUID(), + qs: db.(*QuadStore), + sql: &SQLNodeIterator{ + tableName: newTableName(), + linkIt: sqlItDir{ + it: link.sql, dir: quad.Subject, }, }, } - s, v := it.buildSQL(true, nil) - fmt.Println(s, v) + s, v := it.sql.buildSQL(true, nil) + t.Log(s, v) c := 0 for graph.Next(it) { - fmt.Println(it.Result()) + t.Log(it.Result()) c += 1 } if c != 56 { From ab3f59d21fed1ade9e70e83b385b94c0d0ebf725 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Fri, 7 Aug 2015 14:35:24 -0400 Subject: [PATCH 12/18] Add hash-based indexes --- graph/sql/quadstore.go | 71 ++++++++++++++++++++++++++++++-------- graph/sql/sql_iterator.go | 2 +- graph/sql/sql_link_iterator.go | 44 ++++++++++++----------- graph/sql/sql_node_intersection.go | 13 ++++--- graph/sql/sql_node_iterator.go | 9 +++-- 5 files changed, 95 insertions(+), 44 deletions(-) diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index 3181f2b..4c708cb 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -1,8 +1,12 @@ package sql import ( + "crypto/sha1" "database/sql" + "encoding/hex" "fmt" + "hash" + "sync" "github.com/lib/pq" @@ -18,6 +22,13 @@ func init() { graph.RegisterQuadStore(QuadStoreType, true, newQuadStore, createSQLTables, nil) } +var ( + hashPool = sync.Pool{ + New: func() interface{} { return sha1.New() }, + } + hashSize = sha1.Size +) + type QuadStore struct { db *sql.DB sqlFlavor string @@ -55,7 +66,11 @@ func createSQLTables(addr string, options graph.Options) error { horizon BIGSERIAL PRIMARY KEY, id BIGINT, ts timestamp, - UNIQUE(subject, predicate, object, label) + subject_hash TEXT NOT NULL, + predicate_hash TEXT NOT NULL, + object_hash TEXT NOT NULL, + label_hash TEXT, + UNIQUE(subject_hash, predicate_hash, object_hash, label_hash) );`) if err != nil { glog.Errorf("Cannot create quad table: %v", quadTable) @@ -73,17 +88,11 @@ func createSQLTables(addr string, options graph.Options) error { CREATE INDEX pos_index ON quads USING brin(predicate) WITH (pages_per_range = 32); CREATE INDEX osp_index ON quads USING brin(object) WITH (pages_per_range = 32); `) - } else if idxStrat == "prefix" { - index, err = tx.Exec(fmt.Sprintf(` - CREATE INDEX spo_index ON quads (substr(subject, 0, 8)) WITH (FILLFACTOR = %d); - CREATE INDEX pos_index ON quads (substr(predicate, 0, 8)) WITH (FILLFACTOR = %d); - CREATE INDEX osp_index ON quads (substr(object, 0, 8)) WITH (FILLFACTOR = %d); - `, factor, factor, factor)) } else { index, err = tx.Exec(fmt.Sprintf(` - CREATE INDEX spo_index ON quads (subject, predicate, object) WITH (FILLFACTOR = %d); - CREATE INDEX pos_index ON quads (predicate, object, subject) WITH (FILLFACTOR = %d); - CREATE INDEX osp_index ON quads (object, subject, predicate) WITH (FILLFACTOR = %d); + CREATE INDEX spo_index ON quads (subject_hash) WITH (FILLFACTOR = %d); + CREATE INDEX pos_index ON quads (predicate_hash) WITH (FILLFACTOR = %d); + CREATE INDEX osp_index ON quads (object_hash) WITH (FILLFACTOR = %d); `, factor, factor, factor)) } if err != nil { @@ -107,13 +116,34 @@ func newQuadStore(addr string, options graph.Options) (graph.QuadStore, error) { return &qs, nil } +func hashOf(s string) string { + h := hashPool.Get().(hash.Hash) + h.Reset() + defer hashPool.Put(h) + key := make([]byte, 0, hashSize) + h.Write([]byte(s)) + key = h.Sum(key) + return hex.EncodeToString(key) +} + func (qs *QuadStore) copyFrom(tx *sql.Tx, in []graph.Delta) error { - stmt, err := tx.Prepare(pq.CopyIn("quads", "subject", "predicate", "object", "label", "id", "ts")) + stmt, err := tx.Prepare(pq.CopyIn("quads", "subject", "predicate", "object", "label", "id", "ts", "subject_hash", "predicate_hash", "object_hash", "label_hash")) if err != nil { return err } for _, d := range in { - _, err := stmt.Exec(d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label, d.ID.Int(), d.Timestamp) + _, err := stmt.Exec( + d.Quad.Subject, + d.Quad.Predicate, + d.Quad.Object, + d.Quad.Label, + d.ID.Int(), + d.Timestamp, + hashOf(d.Quad.Subject), + hashOf(d.Quad.Predicate), + hashOf(d.Quad.Object), + hashOf(d.Quad.Label), + ) if err != nil { glog.Errorf("couldn't prepare COPY statement: %v", err) return err @@ -137,7 +167,7 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { return qs.copyFrom(tx, in) } - insert, err := tx.Prepare(`INSERT INTO quads(subject, predicate, object, label, id, ts) VALUES ($1, $2, $3, $4, $5, $6)`) + insert, err := tx.Prepare(`INSERT INTO quads(subject, predicate, object, label, id, ts, subject_hash, predicate_hash, object_hash, label_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`) if err != nil { glog.Errorf("Cannot prepare insert statement: %v", err) return err @@ -145,7 +175,18 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { for _, d := range in { switch d.Action { case graph.Add: - _, err := insert.Exec(d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label, d.ID.Int(), d.Timestamp) + _, err := insert.Exec( + d.Quad.Subject, + d.Quad.Predicate, + d.Quad.Object, + d.Quad.Label, + d.ID.Int(), + d.Timestamp, + hashOf(d.Quad.Subject), + hashOf(d.Quad.Predicate), + hashOf(d.Quad.Object), + hashOf(d.Quad.Label), + ) if err != nil { glog.Errorf("couldn't prepare INSERT statement: %v", err) return err @@ -271,7 +312,7 @@ func (qs *QuadStore) sizeForIterator(isAll bool, dir quad.Direction, val string) var size int64 glog.V(4).Infoln("sql: getting size for select %s, %s", dir.String(), val) err = qs.db.QueryRow( - fmt.Sprintf("SELECT count(*) FROM quads WHERE %s = $1;", dir.String()), val).Scan(&size) + fmt.Sprintf("SELECT count(*) FROM quads WHERE %s_hash = $1;", dir.String()), hashOf(val)).Scan(&size) if err != nil { glog.Errorln("Error getting size from SQL database: %v", err) return 0 diff --git a/graph/sql/sql_iterator.go b/graph/sql/sql_iterator.go index 74ca0c2..3f2d758 100644 --- a/graph/sql/sql_iterator.go +++ b/graph/sql/sql_iterator.go @@ -290,7 +290,7 @@ func (it *SQLIterator) makeCursor(next bool, value graph.Value) error { } var q string var values []string - q, values = it.sql.buildSQL(next, value) + q, values = it.sql.buildSQL(next, value, false) q = convertToPostgres(q, values) ivalues := make([]interface{}, 0, len(values)) for _, v := range values { diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go index 8e7a805..fc6d02e 100644 --- a/graph/sql/sql_link_iterator.go +++ b/graph/sql/sql_link_iterator.go @@ -50,11 +50,11 @@ type tagDir struct { func (t tagDir) String() string { if t.dir == quad.Any { if t.justLocal { - return fmt.Sprintf("%s.__execd as %s", t.table, t.tag) + return fmt.Sprintf("%s.__execd as %s, %s.__execd_hash as %s_hash", t.table, t.tag, t.table, t.tag) } - return fmt.Sprintf("%s.%s as %s", t.table, t.tag, t.tag) + return fmt.Sprintf("%s.%s as %s, %s.%s_hash as %s_hash", t.table, t.tag, t.tag, t.table, t.tag, t.tag) } - return fmt.Sprintf("%s.%s as %s", t.table, t.dir, t.tag) + return fmt.Sprintf("%s.%s as %s, %s.%s_hash as %s_hash", t.table, t.dir, t.tag, t.table, t.dir, t.tag) } type tableDef struct { @@ -71,7 +71,7 @@ type sqlItDir struct { type sqlIterator interface { sqlClone() sqlIterator - buildSQL(next bool, val graph.Value) (string, []string) + buildSQL(next bool, val graph.Value, hash bool) (string, []string) getTables() []tableDef getTags() []tagDir buildWhere() (string, []string) @@ -219,8 +219,8 @@ func (l *SQLLinkIterator) buildWhere() (string, []string) { var q []string var vals []string for _, c := range l.constraints { - q = append(q, fmt.Sprintf("%s.%s = ?", l.tableName, c.dir)) - vals = append(vals, c.vals[0]) + q = append(q, fmt.Sprintf("%s.%s_hash = ?", l.tableName, c.dir)) + vals = append(vals, hashOf(c.vals[0])) } for _, i := range l.nodeIts { t := i.it.tableID() @@ -228,7 +228,7 @@ func (l *SQLLinkIterator) buildWhere() (string, []string) { if t.dir == quad.Any { dir = t.tag } - q = append(q, fmt.Sprintf("%s.%s = %s.%s", l.tableName, i.dir, t.table, dir)) + q = append(q, fmt.Sprintf("%s.%s_hash = %s.%s_hash", l.tableName, i.dir, t.table, dir)) } for _, i := range l.nodeIts { s, v := i.it.buildWhere() @@ -246,13 +246,17 @@ func (l *SQLLinkIterator) tableID() tagDir { } } -func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string) { +func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, hash bool) (string, []string) { query := "SELECT DISTINCT " + hashs := "" + if hash { + hashs = "_hash" + } t := []string{ - fmt.Sprintf("%s.subject", l.tableName), - fmt.Sprintf("%s.predicate", l.tableName), - fmt.Sprintf("%s.object", l.tableName), - fmt.Sprintf("%s.label", l.tableName), + fmt.Sprintf("%s.subject%s", l.tableName, hashs), + fmt.Sprintf("%s.predicate%s", l.tableName, hashs), + fmt.Sprintf("%s.object%s", l.tableName, hashs), + fmt.Sprintf("%s.label%s", l.tableName, hashs), } for _, v := range l.getTags() { t = append(t, v.String()) @@ -276,16 +280,16 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string constraint += " AND " } t = []string{ - fmt.Sprintf("%s.subject = ?", l.tableName), - fmt.Sprintf("%s.predicate = ?", l.tableName), - fmt.Sprintf("%s.object = ?", l.tableName), - fmt.Sprintf("%s.label = ?", l.tableName), + fmt.Sprintf("%s.subject_hash = ?", l.tableName), + fmt.Sprintf("%s.predicate_hash = ?", l.tableName), + fmt.Sprintf("%s.object_hash = ?", l.tableName), + fmt.Sprintf("%s.label_hash = ?", l.tableName), } constraint += strings.Join(t, " AND ") - values = append(values, v.Subject) - values = append(values, v.Predicate) - values = append(values, v.Object) - values = append(values, v.Label) + values = append(values, hashOf(v.Subject)) + values = append(values, hashOf(v.Predicate)) + values = append(values, hashOf(v.Object)) + values = append(values, hashOf(v.Label)) } query += constraint query += ";" diff --git a/graph/sql/sql_node_intersection.go b/graph/sql/sql_node_intersection.go index d010d66..807df72 100644 --- a/graph/sql/sql_node_intersection.go +++ b/graph/sql/sql_node_intersection.go @@ -69,6 +69,9 @@ func (n *SQLNodeIntersection) Describe() string { func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string { m := make(map[string]string) for i, c := range cols { + if strings.HasSuffix(c, "_hash") { + continue + } if c == "__execd" { n.result = result[i] } @@ -100,7 +103,7 @@ func (n *SQLNodeIntersection) buildSubqueries() []tableDef { for i, it := range n.nodeIts { var td tableDef var table string - table, td.values = it.buildSQL(true, nil) + table, td.values = it.buildSQL(true, nil, true) td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) td.name = n.nodetables[i] out = append(out, td) @@ -150,13 +153,13 @@ func (n *SQLNodeIntersection) buildWhere() (string, []string) { var q []string var vals []string for _, tb := range n.nodetables[1:] { - q = append(q, fmt.Sprintf("%s.__execd = %s.__execd", n.nodetables[0], tb)) + q = append(q, fmt.Sprintf("%s.__execd_hash = %s.__execd_hash", n.nodetables[0], tb)) } query := strings.Join(q, " AND ") return query, vals } -func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []string) { +func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, _ bool) (string, []string) { topData := n.tableID() tags := []tagDir{topData} tags = append(tags, n.getTags()...) @@ -184,8 +187,8 @@ func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []st if constraint != "" { constraint += " AND " } - constraint += fmt.Sprintf("%s.%s = ?", topData.table, topData.dir) - values = append(values, v) + constraint += fmt.Sprintf("%s.%s_hash = ?", topData.table, topData.dir) + values = append(values, hashOf(v)) } query += constraint query += ";" diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index 3d9ec21..55eb211 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -89,6 +89,9 @@ func (n *SQLNodeIterator) Describe() string { func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string]string { m := make(map[string]string) for i, c := range cols { + if strings.HasSuffix(c, "_hash") { + continue + } if c == "__execd" { n.result = result[i] } @@ -157,7 +160,7 @@ func (n *SQLNodeIterator) buildWhere() (string, []string) { return query, vals } -func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string) { +func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, _ bool) (string, []string) { topData := n.tableID() tags := []tagDir{topData} tags = append(tags, n.getTags()...) @@ -185,8 +188,8 @@ func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string if constraint != "" { constraint += " AND " } - constraint += fmt.Sprintf("%s.%s = ?", topData.table, topData.dir) - values = append(values, v) + constraint += fmt.Sprintf("%s.%s_hash = ?", topData.table, topData.dir) + values = append(values, hashOf(v)) } query += constraint query += ";" From aedd0401e29c27cd19a8f84d67fa5a85e08f9a6d Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Mon, 10 Aug 2015 15:41:22 -0400 Subject: [PATCH 13/18] Fix up hash interface and speed up save queries 10x Fix all optimizer --- graph/sql/optimizers.go | 3 ++- graph/sql/sql_iterator.go | 2 +- graph/sql/sql_link_iterator.go | 17 ++++++++++++----- graph/sql/sql_node_intersection.go | 9 ++++++--- graph/sql/sql_node_iterator.go | 7 +++++-- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index f04543e..c46c600 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -164,7 +164,8 @@ func (qs *QuadStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bool return newit, true case graph.All: linkit := &SQLLinkIterator{ - size: qs.Size(), + tableName: newTableName(), + size: qs.Size(), } for _, t := range primary.Tagger().Tags() { linkit.tagdirs = append(linkit.tagdirs, tagDir{ diff --git a/graph/sql/sql_iterator.go b/graph/sql/sql_iterator.go index 3f2d758..35a062f 100644 --- a/graph/sql/sql_iterator.go +++ b/graph/sql/sql_iterator.go @@ -290,7 +290,7 @@ func (it *SQLIterator) makeCursor(next bool, value graph.Value) error { } var q string var values []string - q, values = it.sql.buildSQL(next, value, false) + q, values = it.sql.buildSQL(next, value, true) q = convertToPostgres(q, values) ivalues := make([]interface{}, 0, len(values)) for _, v := range values { diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go index fc6d02e..338ce00 100644 --- a/graph/sql/sql_link_iterator.go +++ b/graph/sql/sql_link_iterator.go @@ -71,7 +71,7 @@ type sqlItDir struct { type sqlIterator interface { sqlClone() sqlIterator - buildSQL(next bool, val graph.Value, hash bool) (string, []string) + buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) getTables() []tableDef getTags() []tagDir buildWhere() (string, []string) @@ -246,10 +246,13 @@ func (l *SQLLinkIterator) tableID() tagDir { } } -func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, hash bool) (string, []string) { - query := "SELECT DISTINCT " +func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { + query := "SELECT " + if topLevel { + query += "DISTINCT " + } hashs := "" - if hash { + if !topLevel { hashs = "_hash" } t := []string{ @@ -270,14 +273,18 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, hash bool) (strin t = append(t, fmt.Sprintf("%s as %s", k.table, k.name)) } query += strings.Join(t, ", ") - query += " WHERE " constraint, wherevalues := l.buildWhere() + if constraint != "" { + query += " WHERE " + } values = append(values, wherevalues...) if !next { v := val.(quad.Quad) if constraint != "" { constraint += " AND " + } else { + constraint += " WHERE " } t = []string{ fmt.Sprintf("%s.subject_hash = ?", l.tableName), diff --git a/graph/sql/sql_node_intersection.go b/graph/sql/sql_node_intersection.go index 807df72..9c0434e 100644 --- a/graph/sql/sql_node_intersection.go +++ b/graph/sql/sql_node_intersection.go @@ -103,7 +103,7 @@ func (n *SQLNodeIntersection) buildSubqueries() []tableDef { for i, it := range n.nodeIts { var td tableDef var table string - table, td.values = it.buildSQL(true, nil, true) + table, td.values = it.buildSQL(true, nil, false) td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) td.name = n.nodetables[i] out = append(out, td) @@ -159,11 +159,14 @@ func (n *SQLNodeIntersection) buildWhere() (string, []string) { return query, vals } -func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, _ bool) (string, []string) { +func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { topData := n.tableID() tags := []tagDir{topData} tags = append(tags, n.getTags()...) - query := "SELECT DISTINCT " + query := "SELECT " + if topLevel { + query += "DISTINCT " + } var t []string for _, v := range tags { t = append(t, v.String()) diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index 55eb211..abe89f4 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -160,11 +160,14 @@ func (n *SQLNodeIterator) buildWhere() (string, []string) { return query, vals } -func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, _ bool) (string, []string) { +func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { topData := n.tableID() tags := []tagDir{topData} tags = append(tags, n.getTags()...) - query := "SELECT DISTINCT " + query := "SELECT " + if topLevel { + query += "DISTINCT " + } var t []string for _, v := range tags { t = append(t, v.String()) From fab8cd64b3addd6a38bcd588325d7d45a2e20465 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Mon, 10 Aug 2015 16:28:16 -0400 Subject: [PATCH 14/18] Option to remove size calls Fix permutations of optimization intersections Return empty string as per bolt fix case sensitivity and memstore panic --- graph/memstore/quadstore.go | 3 +++ graph/sql/optimizers.go | 17 +++++++++++++++++ graph/sql/quadstore.go | 11 +++++++++++ graph/sql/sql_link_iterator.go | 6 +++--- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/graph/memstore/quadstore.go b/graph/memstore/quadstore.go index 5605437..a2db9db 100644 --- a/graph/memstore/quadstore.go +++ b/graph/memstore/quadstore.go @@ -249,6 +249,9 @@ func (qs *QuadStore) ValueOf(name string) graph.Value { } func (qs *QuadStore) NameOf(id graph.Value) string { + if id == nil { + return "" + } return qs.revIDMap[id.(int64)] } diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index c46c600..538ddd7 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -28,10 +28,16 @@ func intersect(a sqlIterator, b sqlIterator, qs *QuadStore) (*SQLIterator, error if bnew, ok := b.(*SQLNodeIterator); ok { return intersectNode(anew, bnew, qs) } + if bnew, ok := b.(*SQLNodeIntersection); ok { + return appendNodeIntersection(bnew, anew, qs) + } } else if anew, ok := a.(*SQLNodeIntersection); ok { if bnew, ok := b.(*SQLNodeIterator); ok { return appendNodeIntersection(anew, bnew, qs) } + if bnew, ok := b.(*SQLNodeIntersection); ok { + return combineNodeIntersection(anew, bnew, qs) + } } else if anew, ok := a.(*SQLLinkIterator); ok { if bnew, ok := b.(*SQLLinkIterator); ok { return intersectLink(anew, bnew, qs) @@ -65,6 +71,17 @@ func appendNodeIntersection(a *SQLNodeIntersection, b *SQLNodeIterator, qs *Quad return it, nil } +func combineNodeIntersection(a *SQLNodeIntersection, b *SQLNodeIntersection, qs *QuadStore) (*SQLIterator, error) { + m := &SQLNodeIntersection{ + tableName: newTableName(), + nodeIts: append(a.nodeIts, b.nodeIts...), + } + m.Tagger().CopyFromTagger(a.Tagger()) + m.Tagger().CopyFromTagger(b.Tagger()) + it := NewSQLIterator(qs, m) + return it, nil +} + func intersectLink(a *SQLLinkIterator, b *SQLLinkIterator, qs *QuadStore) (*SQLIterator, error) { m := &SQLLinkIterator{ tableName: newTableName(), diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index 4c708cb..61dfbb9 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -34,6 +34,7 @@ type QuadStore struct { sqlFlavor string size int64 lru *cache + noSizes bool } func connectSQLTables(addr string, _ graph.Options) (*sql.DB, error) { @@ -257,6 +258,10 @@ func (qs *QuadStore) ValueOf(s string) graph.Value { } func (qs *QuadStore) NameOf(v graph.Value) string { + if v == nil { + glog.V(2).Info("NameOf was nil") + return "" + } return v.(string) } @@ -306,6 +311,12 @@ func (qs *QuadStore) sizeForIterator(isAll bool, dir quad.Direction, val string) if isAll { return qs.Size() } + if qs.noSizes { + if dir == quad.Predicate { + return (qs.Size() / 100) + 1 + } + return (qs.Size() / 1000) + 1 + } if val, ok := qs.lru.Get(val + string(dir.Prefix())); ok { return val } diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go index 338ce00..dea3243 100644 --- a/graph/sql/sql_link_iterator.go +++ b/graph/sql/sql_link_iterator.go @@ -50,11 +50,11 @@ type tagDir struct { func (t tagDir) String() string { if t.dir == quad.Any { if t.justLocal { - return fmt.Sprintf("%s.__execd as %s, %s.__execd_hash as %s_hash", t.table, t.tag, t.table, t.tag) + return fmt.Sprintf("%s.__execd as \"%s\", %s.__execd_hash as %s_hash", t.table, t.tag, t.table, t.tag) } - return fmt.Sprintf("%s.%s as %s, %s.%s_hash as %s_hash", t.table, t.tag, t.tag, t.table, t.tag, t.tag) + return fmt.Sprintf("%s.\"%s\" as \"%s\", %s.%s_hash as %s_hash", t.table, t.tag, t.tag, t.table, t.tag, t.tag) } - return fmt.Sprintf("%s.%s as %s, %s.%s_hash as %s_hash", t.table, t.dir, t.tag, t.table, t.dir, t.tag) + return fmt.Sprintf("%s.%s as \"%s\", %s.%s_hash as %s_hash", t.table, t.dir, t.tag, t.table, t.dir, t.tag) } type tableDef struct { From ebaf8b2a9869a1277a5aa1f1173a6d3f35489366 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 12 Aug 2015 14:40:02 -0400 Subject: [PATCH 15/18] Can't use distinct, fix double-hashing bug remove distinctness, make printing the SQL query part of describe clean up quadstore a bit, add noSizes as an external option --- graph/sql/quadstore.go | 37 ++++++++++++++----------------------- graph/sql/sql_iterator.go | 2 +- graph/sql/sql_link_iterator.go | 24 ++++++++---------------- graph/sql/sql_node_intersection.go | 12 ++++-------- graph/sql/sql_node_iterator.go | 11 ++++------- 5 files changed, 31 insertions(+), 55 deletions(-) diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index 61dfbb9..65b25fe 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -77,25 +77,17 @@ func createSQLTables(addr string, options graph.Options) error { glog.Errorf("Cannot create quad table: %v", quadTable) return err } - idxStrat, _, err := options.StringKey("db_index_strategy") factor, factorOk, err := options.IntKey("db_fill_factor") if !factorOk { factor = 50 } var index sql.Result - if idxStrat == "brin" { - index, err = tx.Exec(` - CREATE INDEX spo_index ON quads USING brin(subject) WITH (pages_per_range = 32); - CREATE INDEX pos_index ON quads USING brin(predicate) WITH (pages_per_range = 32); - CREATE INDEX osp_index ON quads USING brin(object) WITH (pages_per_range = 32); - `) - } else { - index, err = tx.Exec(fmt.Sprintf(` + + index, err = tx.Exec(fmt.Sprintf(` CREATE INDEX spo_index ON quads (subject_hash) WITH (FILLFACTOR = %d); CREATE INDEX pos_index ON quads (predicate_hash) WITH (FILLFACTOR = %d); CREATE INDEX osp_index ON quads (object_hash) WITH (FILLFACTOR = %d); `, factor, factor, factor)) - } if err != nil { glog.Errorf("Cannot create indices: %v", index) return err @@ -110,10 +102,22 @@ func newQuadStore(addr string, options graph.Options) (graph.QuadStore, error) { if err != nil { return nil, err } + localOpt, localOptOk, err := options.BoolKey("local_optimize") + if err != nil { + return nil, err + } qs.db = conn qs.sqlFlavor = "postgres" qs.size = -1 qs.lru = newCache(1024) + + // Skip size checking by default. + qs.noSizes = true + if localOptOk { + if localOpt { + qs.noSizes = false + } + } return &qs, nil } @@ -192,25 +196,12 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { glog.Errorf("couldn't prepare INSERT statement: %v", err) return err } - //for _, dir := range quad.Directions { - //_, err := tx.Exec(` - //WITH upsert AS (UPDATE nodes SET size=size+1 WHERE node=$1 RETURNING *) - //INSERT INTO nodes (node, size) SELECT $1, 1 WHERE NOT EXISTS (SELECT * FROM UPSERT); - //`, d.Quad.Get(dir)) - //if err != nil { - //glog.Errorf("couldn't prepare upsert statement in direction %s: %v", dir, err) - //return err - //} - //} case graph.Delete: _, err := tx.Exec(`DELETE FROM quads WHERE subject=$1 and predicate=$2 and object=$3 and label=$4;`, d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label) if err != nil { glog.Errorf("couldn't prepare DELETE statement: %v", err) } - //for _, dir := range quad.Directions { - //tx.Exec(`UPDATE nodes SET size=size-1 WHERE node=$1;`, d.Quad.Get(dir)) - //} default: panic("unknown action") } diff --git a/graph/sql/sql_iterator.go b/graph/sql/sql_iterator.go index 35a062f..74ca0c2 100644 --- a/graph/sql/sql_iterator.go +++ b/graph/sql/sql_iterator.go @@ -290,7 +290,7 @@ func (it *SQLIterator) makeCursor(next bool, value graph.Value) error { } var q string var values []string - q, values = it.sql.buildSQL(next, value, true) + q, values = it.sql.buildSQL(next, value) q = convertToPostgres(q, values) ivalues := make([]interface{}, 0, len(values)) for _, v := range values { diff --git a/graph/sql/sql_link_iterator.go b/graph/sql/sql_link_iterator.go index dea3243..f2750c3 100644 --- a/graph/sql/sql_link_iterator.go +++ b/graph/sql/sql_link_iterator.go @@ -71,7 +71,7 @@ type sqlItDir struct { type sqlIterator interface { sqlClone() sqlIterator - buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) + buildSQL(next bool, val graph.Value) (string, []string) getTables() []tableDef getTags() []tagDir buildWhere() (string, []string) @@ -143,7 +143,8 @@ func (l *SQLLinkIterator) Size(qs *QuadStore) (int64, bool) { } func (l *SQLLinkIterator) Describe() string { - return fmt.Sprintf("SQL_LINK_QUERY: %#v", l) + s, _ := l.buildSQL(true, nil) + return fmt.Sprintf("SQL_LINK_QUERY: %s", s) } func (l *SQLLinkIterator) Type() sqlQueryType { @@ -246,20 +247,13 @@ func (l *SQLLinkIterator) tableID() tagDir { } } -func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { +func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string) { query := "SELECT " - if topLevel { - query += "DISTINCT " - } - hashs := "" - if !topLevel { - hashs = "_hash" - } t := []string{ - fmt.Sprintf("%s.subject%s", l.tableName, hashs), - fmt.Sprintf("%s.predicate%s", l.tableName, hashs), - fmt.Sprintf("%s.object%s", l.tableName, hashs), - fmt.Sprintf("%s.label%s", l.tableName, hashs), + fmt.Sprintf("%s.subject", l.tableName), + fmt.Sprintf("%s.predicate", l.tableName), + fmt.Sprintf("%s.object", l.tableName), + fmt.Sprintf("%s.label", l.tableName), } for _, v := range l.getTags() { t = append(t, v.String()) @@ -301,8 +295,6 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, topLevel bool) (s query += constraint query += ";" - glog.V(2).Infoln(query) - if glog.V(4) { dstr := query for i := 1; i <= len(values); i++ { diff --git a/graph/sql/sql_node_intersection.go b/graph/sql/sql_node_intersection.go index 9c0434e..a9e26e2 100644 --- a/graph/sql/sql_node_intersection.go +++ b/graph/sql/sql_node_intersection.go @@ -63,7 +63,8 @@ func (n *SQLNodeIntersection) Size(qs *QuadStore) (int64, bool) { } func (n *SQLNodeIntersection) Describe() string { - return fmt.Sprintf("SQL_NODE_INTERSECTION: %#v", n) + s, _ := n.buildSQL(true, nil) + return fmt.Sprintf("SQL_NODE_INTERSECTION: %s", s) } func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string { @@ -103,7 +104,7 @@ func (n *SQLNodeIntersection) buildSubqueries() []tableDef { for i, it := range n.nodeIts { var td tableDef var table string - table, td.values = it.buildSQL(true, nil, false) + table, td.values = it.buildSQL(true, nil) td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) td.name = n.nodetables[i] out = append(out, td) @@ -159,14 +160,11 @@ func (n *SQLNodeIntersection) buildWhere() (string, []string) { return query, vals } -func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { +func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []string) { topData := n.tableID() tags := []tagDir{topData} tags = append(tags, n.getTags()...) query := "SELECT " - if topLevel { - query += "DISTINCT " - } var t []string for _, v := range tags { t = append(t, v.String()) @@ -196,8 +194,6 @@ func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, topLevel bool query += constraint query += ";" - glog.V(2).Infoln(query) - if glog.V(4) { dstr := query for i := 1; i <= len(values); i++ { diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index abe89f4..811121a 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -83,7 +83,8 @@ func (n *SQLNodeIterator) Size(qs *QuadStore) (int64, bool) { } func (n *SQLNodeIterator) Describe() string { - return fmt.Sprintf("SQL_NODE_QUERY: %#v", n) + s, _ := n.buildSQL(true, nil) + return fmt.Sprintf("SQL_NODE_QUERY: %s", s) } func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string]string { @@ -160,14 +161,12 @@ func (n *SQLNodeIterator) buildWhere() (string, []string) { return query, vals } -func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { +func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string) { topData := n.tableID() tags := []tagDir{topData} tags = append(tags, n.getTags()...) query := "SELECT " - if topLevel { - query += "DISTINCT " - } + var t []string for _, v := range tags { t = append(t, v.String()) @@ -197,8 +196,6 @@ func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, topLevel bool) (s query += constraint query += ";" - glog.V(2).Infoln(query) - if glog.V(4) { dstr := query for i := 1; i <= len(values); i++ { From 7ddeb101ca2d6ee60b14596e6247ea5f67a40f78 Mon Sep 17 00:00:00 2001 From: Quentin Machu Date: Wed, 26 Aug 2015 01:19:28 -0400 Subject: [PATCH 16/18] Fix NPEs in SQL Next and Contains --- graph/sql/all_iterator.go | 3 +++ graph/sql/sql_iterator.go | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/graph/sql/all_iterator.go b/graph/sql/all_iterator.go index 05f94b3..c91c65d 100644 --- a/graph/sql/all_iterator.go +++ b/graph/sql/all_iterator.go @@ -131,6 +131,9 @@ func (it *AllIterator) Next() bool { graph.NextLogIn(it) if it.cursor == nil { it.makeCursor() + if it.cursor == nil { + return false + } } if !it.cursor.Next() { glog.V(4).Infoln("sql: No next") diff --git a/graph/sql/sql_iterator.go b/graph/sql/sql_iterator.go index 74ca0c2..79df549 100644 --- a/graph/sql/sql_iterator.go +++ b/graph/sql/sql_iterator.go @@ -151,6 +151,11 @@ func (it *SQLIterator) Next() bool { graph.NextLogIn(it) if it.cursor == nil { err = it.makeCursor(true, nil) + if err != nil { + glog.Errorf("Couldn't make query: %v", err) + it.err = err + return false + } it.cols, err = it.cursor.Columns() if err != nil { glog.Errorf("Couldn't get columns") @@ -226,7 +231,9 @@ func (it *SQLIterator) Contains(v graph.Value) bool { if err != nil { glog.Errorf("Couldn't make query: %v", err) it.err = err - it.cursor.Close() + if it.cursor != nil { + it.cursor.Close() + } return false } it.cols, err = it.cursor.Columns() From 13b11e46a4e2fac36f86b697b8bed9fab65cca58 Mon Sep 17 00:00:00 2001 From: Quentin Machu Date: Wed, 26 Aug 2015 02:10:27 -0400 Subject: [PATCH 17/18] Make the SQL connection fail-fast --- graph/sql/quadstore.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index 65b25fe..d7f3a87 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -44,6 +44,13 @@ func connectSQLTables(addr string, _ graph.Options) (*sql.DB, error) { glog.Errorf("Couldn't open database at %s: %#v", addr, err) return nil, err } + // "Open may just validate its arguments without creating a connection to the database." + // "To verify that the data source name is valid, call Ping." + // Source: http://golang.org/pkg/database/sql/#Open + if err := conn.Ping(); err != nil { + glog.Errorf("Couldn't open database at %s: %#v", addr, err) + return nil, err + } return conn, nil } From fb7e200551378fc4cee1822f9ef4392dc7473cc9 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 2 Sep 2015 16:25:29 -0400 Subject: [PATCH 18/18] Respect IgnoreMissing, which SQL does silently. Fixes barakmich/psql #10 --- graph/sql/quadstore.go | 22 ++++++++++++++++------ internal/db/repl.go | 5 ++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/graph/sql/quadstore.go b/graph/sql/quadstore.go index d7f3a87..b8ace88 100644 --- a/graph/sql/quadstore.go +++ b/graph/sql/quadstore.go @@ -4,6 +4,7 @@ import ( "crypto/sha1" "database/sql" "encoding/hex" + "errors" "fmt" "hash" "sync" @@ -168,7 +169,7 @@ func (qs *QuadStore) copyFrom(tx *sql.Tx, in []graph.Delta) error { return stmt.Close() } -func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { +func (qs *QuadStore) runTxPostgres(tx *sql.Tx, in []graph.Delta, opts graph.IgnoreOpts) error { allAdds := true for _, d := range in { if d.Action != graph.Add { @@ -180,6 +181,7 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { } insert, err := tx.Prepare(`INSERT INTO quads(subject, predicate, object, label, id, ts, subject_hash, predicate_hash, object_hash, label_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`) + defer insert.Close() if err != nil { glog.Errorf("Cannot prepare insert statement: %v", err) return err @@ -204,10 +206,17 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { return err } case graph.Delete: - _, err := tx.Exec(`DELETE FROM quads WHERE subject=$1 and predicate=$2 and object=$3 and label=$4;`, + result, err := tx.Exec(`DELETE FROM quads WHERE subject=$1 and predicate=$2 and object=$3 and label=$4;`, d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label) if err != nil { - glog.Errorf("couldn't prepare DELETE statement: %v", err) + glog.Errorf("couldn't exec DELETE statement: %v", err) + } + affected, err := result.RowsAffected() + if err != nil { + glog.Errorf("couldn't get DELETE RowsAffected: %v", err) + } + if affected != 1 && !opts.IgnoreMissing { + return errors.New("deleting non-existent triple; rolling back") } default: panic("unknown action") @@ -216,8 +225,8 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error { return nil } -func (qs *QuadStore) ApplyDeltas(in []graph.Delta, _ graph.IgnoreOpts) error { - // TODO(barakmich): Support ignoreOpts? "ON CONFLICT IGNORE" +func (qs *QuadStore) ApplyDeltas(in []graph.Delta, opts graph.IgnoreOpts) error { + // TODO(barakmich): Support more ignoreOpts? "ON CONFLICT IGNORE" tx, err := qs.db.Begin() if err != nil { glog.Errorf("couldn't begin write transaction: %v", err) @@ -225,8 +234,9 @@ func (qs *QuadStore) ApplyDeltas(in []graph.Delta, _ graph.IgnoreOpts) error { } switch qs.sqlFlavor { case "postgres": - err = qs.buildTxPostgres(tx, in) + err = qs.runTxPostgres(tx, in, opts) if err != nil { + tx.Rollback() return err } default: diff --git a/internal/db/repl.go b/internal/db/repl.go index 20ec017..134a8ff 100644 --- a/internal/db/repl.go +++ b/internal/db/repl.go @@ -161,7 +161,10 @@ func Repl(h *graph.Handle, queryLanguage string, cfg *config.Config) error { fmt.Printf("Error: not a valid quad: %v\n", err) continue } - h.QuadWriter.RemoveQuad(quad) + err = h.QuadWriter.RemoveQuad(quad) + if err != nil { + fmt.Printf("error deleting: %v\n", err) + } continue case "exit":