From 8830760df242cbd7d30060b338b8cb5e738a2e15 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Thu, 30 Jul 2015 18:40:48 -0400 Subject: [PATCH] Split intersection and node iteration into two logical SQL iterators --- graph/sql/optimizers.go | 33 ++++-- graph/sql/sql_node_intersection.go | 209 +++++++++++++++++++++++++++++++++++++ graph/sql/sql_node_iterator.go | 116 +++++--------------- 3 files changed, 259 insertions(+), 99 deletions(-) create mode 100644 graph/sql/sql_node_intersection.go diff --git a/graph/sql/optimizers.go b/graph/sql/optimizers.go index debf7a8..68e1e3b 100644 --- a/graph/sql/optimizers.go +++ b/graph/sql/optimizers.go @@ -28,6 +28,10 @@ func intersect(a sqlIterator, b sqlIterator, qs *QuadStore) (*SQLIterator, error if bnew, ok := b.(*SQLNodeIterator); ok { return intersectNode(anew, bnew, qs) } + } else if anew, ok := a.(*SQLNodeIntersection); ok { + if bnew, ok := b.(*SQLNodeIterator); ok { + return appendNodeIntersection(anew, bnew, qs) + } } else if anew, ok := a.(*SQLLinkIterator); ok { if bnew, ok := b.(*SQLLinkIterator); ok { return intersectLink(anew, bnew, qs) @@ -40,9 +44,20 @@ func intersect(a sqlIterator, b sqlIterator, qs *QuadStore) (*SQLIterator, error } func intersectNode(a *SQLNodeIterator, b *SQLNodeIterator, qs *QuadStore) (*SQLIterator, error) { - m := &SQLNodeIterator{ + m := &SQLNodeIntersection{ tableName: newTableName(), - linkIts: append(a.linkIts, b.linkIts...), + nodeIts: []sqlIterator{a, b}, + } + m.Tagger().CopyFromTagger(a.Tagger()) + m.Tagger().CopyFromTagger(b.Tagger()) + it := NewSQLIterator(qs, m) + return it, nil +} + +func appendNodeIntersection(a *SQLNodeIntersection, b *SQLNodeIterator, qs *QuadStore) (*SQLIterator, error) { + m := &SQLNodeIntersection{ + tableName: newTableName(), + nodeIts: append(a.nodeIts, b), } m.Tagger().CopyFromTagger(a.Tagger()) m.Tagger().CopyFromTagger(b.Tagger()) @@ -71,11 +86,9 @@ func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, err out := &SQLNodeIterator{ tableName: newTableName(), - linkIts: []sqlItDir{ - sqlItDir{ - it: a, - dir: d, - }, + linkIt: sqlItDir{ + it: a, + dir: d, }, } it := NewSQLIterator(qs, out) @@ -83,9 +96,13 @@ func hasa(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, err } func linksto(aIn sqlIterator, d quad.Direction, qs *QuadStore) (graph.Iterator, error) { + var a sqlIterator a, ok := aIn.(*SQLNodeIterator) if !ok { - return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") + a, ok = aIn.(*SQLNodeIntersection) + if !ok { + return nil, errors.New("Can't take the LINKSTO of a node SQL iterator") + } } out := &SQLLinkIterator{ diff --git a/graph/sql/sql_node_intersection.go b/graph/sql/sql_node_intersection.go new file mode 100644 index 0000000..d010d66 --- /dev/null +++ b/graph/sql/sql_node_intersection.go @@ -0,0 +1,209 @@ +// Copyright 2015 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "fmt" + "strings" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + "github.com/google/cayley/quad" +) + +type SQLNodeIntersection struct { + tableName string + + nodeIts []sqlIterator + nodetables []string + size int64 + tagger graph.Tagger + + result string +} + +func (n *SQLNodeIntersection) sqlClone() sqlIterator { + m := &SQLNodeIntersection{ + tableName: n.tableName, + size: n.size, + } + for _, i := range n.nodeIts { + m.nodeIts = append(m.nodeIts, i.sqlClone()) + } + m.tagger.CopyFromTagger(n.Tagger()) + return m +} + +func (n *SQLNodeIntersection) Tagger() *graph.Tagger { + return &n.tagger +} + +func (n *SQLNodeIntersection) Result() graph.Value { + return n.result +} + +func (n *SQLNodeIntersection) Type() sqlQueryType { + return nodeIntersect +} + +func (n *SQLNodeIntersection) Size(qs *QuadStore) (int64, bool) { + return qs.Size() / int64(len(n.nodeIts)+1), true +} + +func (n *SQLNodeIntersection) Describe() string { + return fmt.Sprintf("SQL_NODE_INTERSECTION: %#v", n) +} + +func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string { + m := make(map[string]string) + for i, c := range cols { + if c == "__execd" { + n.result = result[i] + } + m[c] = result[i] + } + return m +} + +func (n *SQLNodeIntersection) makeNodeTableNames() { + if n.nodetables != nil { + return + } + n.nodetables = make([]string, len(n.nodeIts)) + for i, _ := range n.nodetables { + n.nodetables[i] = newNodeTableName() + } +} + +func (n *SQLNodeIntersection) getTables() []tableDef { + if len(n.nodeIts) == 0 { + panic("Combined no subnode queries") + } + return n.buildSubqueries() +} + +func (n *SQLNodeIntersection) buildSubqueries() []tableDef { + var out []tableDef + n.makeNodeTableNames() + for i, it := range n.nodeIts { + var td tableDef + var table string + table, td.values = it.buildSQL(true, nil) + td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) + td.name = n.nodetables[i] + out = append(out, td) + } + return out +} + +func (n *SQLNodeIntersection) tableID() tagDir { + n.makeNodeTableNames() + return tagDir{ + table: n.nodetables[0], + dir: quad.Any, + tag: "__execd", + } +} + +func (n *SQLNodeIntersection) getLocalTags() []tagDir { + myTag := n.tableID() + var out []tagDir + for _, tag := range n.tagger.Tags() { + out = append(out, tagDir{ + dir: myTag.dir, + table: myTag.table, + tag: tag, + justLocal: true, + }) + } + return out +} + +func (n *SQLNodeIntersection) getTags() []tagDir { + out := n.getLocalTags() + n.makeNodeTableNames() + for i, it := range n.nodeIts { + for _, v := range it.getTags() { + out = append(out, tagDir{ + tag: v.tag, + dir: quad.Any, + table: n.nodetables[i], + }) + } + } + return out +} + +func (n *SQLNodeIntersection) buildWhere() (string, []string) { + var q []string + var vals []string + for _, tb := range n.nodetables[1:] { + q = append(q, fmt.Sprintf("%s.__execd = %s.__execd", n.nodetables[0], tb)) + } + query := strings.Join(q, " AND ") + return query, vals +} + +func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []string) { + topData := n.tableID() + tags := []tagDir{topData} + tags = append(tags, n.getTags()...) + query := "SELECT DISTINCT " + var t []string + for _, v := range tags { + t = append(t, v.String()) + } + query += strings.Join(t, ", ") + query += " FROM " + t = []string{} + var values []string + for _, k := range n.getTables() { + values = append(values, k.values...) + t = append(t, fmt.Sprintf("%s as %s", k.table, k.name)) + } + query += strings.Join(t, ", ") + query += " WHERE " + + constraint, wherevalues := n.buildWhere() + values = append(values, wherevalues...) + + if !next { + v := val.(string) + if constraint != "" { + constraint += " AND " + } + constraint += fmt.Sprintf("%s.%s = ?", topData.table, topData.dir) + values = append(values, v) + } + query += constraint + query += ";" + + glog.V(2).Infoln(query) + + if glog.V(4) { + dstr := query + for i := 1; i <= len(values); i++ { + dstr = strings.Replace(dstr, "?", fmt.Sprintf("'%s'", values[i-1]), 1) + } + glog.V(4).Infoln(dstr) + } + return query, values +} + +func (n *SQLNodeIntersection) sameTopResult(target []string, test []string) bool { + return target[0] == test[0] +} + +func (n *SQLNodeIntersection) quickContains(_ graph.Value) (bool, bool) { return false, false } diff --git a/graph/sql/sql_node_iterator.go b/graph/sql/sql_node_iterator.go index 90e80e8..3d9ec21 100644 --- a/graph/sql/sql_node_iterator.go +++ b/graph/sql/sql_node_iterator.go @@ -31,6 +31,7 @@ type sqlQueryType int const ( node sqlQueryType = iota link + nodeIntersect ) func init() { @@ -45,10 +46,9 @@ func newNodeTableName() string { type SQLNodeIterator struct { tableName string - linkIts []sqlItDir - nodetables []string - size int64 - tagger graph.Tagger + linkIt sqlItDir + size int64 + tagger graph.Tagger result string } @@ -57,12 +57,10 @@ func (n *SQLNodeIterator) sqlClone() sqlIterator { m := &SQLNodeIterator{ tableName: n.tableName, size: n.size, - } - for _, i := range n.linkIts { - m.linkIts = append(m.linkIts, sqlItDir{ - dir: i.dir, - it: i.it.sqlClone(), - }) + linkIt: sqlItDir{ + dir: n.linkIt.dir, + it: n.linkIt.it.sqlClone(), + }, } m.tagger.CopyFromTagger(n.Tagger()) return m @@ -81,7 +79,7 @@ func (n *SQLNodeIterator) Type() sqlQueryType { } func (n *SQLNodeIterator) Size(qs *QuadStore) (int64, bool) { - return qs.Size() / int64(len(n.linkIts)+1), true + return qs.Size() / 2, true } func (n *SQLNodeIterator) Describe() string { @@ -99,25 +97,10 @@ func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string return m } -func (n *SQLNodeIterator) makeNodeTableNames() { - if n.nodetables != nil { - return - } - n.nodetables = make([]string, len(n.linkIts)) - for i, _ := range n.nodetables { - n.nodetables[i] = newNodeTableName() - } -} - func (n *SQLNodeIterator) getTables() []tableDef { var out []tableDef - switch len(n.linkIts) { - case 0: - return []tableDef{tableDef{table: "quads", name: n.tableName}} - case 1: - out = n.linkIts[0].it.getTables() - default: - return n.buildSubqueries() + if n.linkIt.it != nil { + out = n.linkIt.it.getTables() } if len(out) == 0 { out = append(out, tableDef{table: "quads", name: n.tableName}) @@ -125,49 +108,19 @@ func (n *SQLNodeIterator) getTables() []tableDef { return out } -func (n *SQLNodeIterator) buildSubqueries() []tableDef { - var out []tableDef - n.makeNodeTableNames() - for i, it := range n.linkIts { - var td tableDef - // TODO(barakmich): This is a dirty hack. The real implementation is to - // separate SQL iterators to build a similar tree as we're doing here, and - // have a single graph.Iterator 'caddy' structure around it. - subNode := &SQLNodeIterator{ - tableName: newTableName(), - linkIts: []sqlItDir{it}, - } - var table string - table, td.values = subNode.buildSQL(true, nil) - td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) - td.name = n.nodetables[i] - out = append(out, td) - } - return out -} - func (n *SQLNodeIterator) tableID() tagDir { - switch len(n.linkIts) { - case 0: + if n.linkIt.it != nil { return tagDir{ - table: n.tableName, - dir: quad.Any, - tag: "__execd", - } - case 1: - return tagDir{ - table: n.linkIts[0].it.tableID().table, - dir: n.linkIts[0].dir, - tag: "__execd", - } - default: - n.makeNodeTableNames() - return tagDir{ - table: n.nodetables[0], - dir: quad.Any, + table: n.linkIt.it.tableID().table, + dir: n.linkIt.dir, tag: "__execd", } } + return tagDir{ + table: n.tableName, + dir: quad.Any, + tag: "__execd", + } } func (n *SQLNodeIterator) getLocalTags() []tagDir { @@ -186,21 +139,8 @@ func (n *SQLNodeIterator) getLocalTags() []tagDir { func (n *SQLNodeIterator) getTags() []tagDir { out := n.getLocalTags() - if len(n.linkIts) > 1 { - n.makeNodeTableNames() - for i, it := range n.linkIts { - for _, v := range it.it.getTags() { - out = append(out, tagDir{ - tag: v.tag, - dir: quad.Any, - table: n.nodetables[i], - }) - } - } - return out - } - for _, i := range n.linkIts { - out = append(out, i.it.getTags()...) + if n.linkIt.it != nil { + out = append(out, n.linkIt.it.getTags()...) } return out } @@ -208,16 +148,10 @@ func (n *SQLNodeIterator) getTags() []tagDir { func (n *SQLNodeIterator) buildWhere() (string, []string) { var q []string var vals []string - if len(n.linkIts) > 1 { - for _, tb := range n.nodetables[1:] { - q = append(q, fmt.Sprintf("%s.__execd = %s.__execd", n.nodetables[0], tb)) - } - } else { - for _, i := range n.linkIts { - s, v := i.it.buildWhere() - q = append(q, s) - vals = append(vals, v...) - } + if n.linkIt.it != nil { + s, v := n.linkIt.it.buildWhere() + q = append(q, s) + vals = append(vals, v...) } query := strings.Join(q, " AND ") return query, vals