Can't use distinct, fix double-hashing bug

remove distinctness, make printing the SQL query part of describe

clean up quadstore a bit, add noSizes as an external option
This commit is contained in:
Barak Michener 2015-08-12 14:40:02 -04:00
parent fab8cd64b3
commit ebaf8b2a98
5 changed files with 31 additions and 55 deletions

View file

@ -77,25 +77,17 @@ func createSQLTables(addr string, options graph.Options) error {
glog.Errorf("Cannot create quad table: %v", quadTable) glog.Errorf("Cannot create quad table: %v", quadTable)
return err return err
} }
idxStrat, _, err := options.StringKey("db_index_strategy")
factor, factorOk, err := options.IntKey("db_fill_factor") factor, factorOk, err := options.IntKey("db_fill_factor")
if !factorOk { if !factorOk {
factor = 50 factor = 50
} }
var index sql.Result var index sql.Result
if idxStrat == "brin" {
index, err = tx.Exec(` index, err = tx.Exec(fmt.Sprintf(`
CREATE INDEX spo_index ON quads USING brin(subject) WITH (pages_per_range = 32);
CREATE INDEX pos_index ON quads USING brin(predicate) WITH (pages_per_range = 32);
CREATE INDEX osp_index ON quads USING brin(object) WITH (pages_per_range = 32);
`)
} else {
index, err = tx.Exec(fmt.Sprintf(`
CREATE INDEX spo_index ON quads (subject_hash) WITH (FILLFACTOR = %d); CREATE INDEX spo_index ON quads (subject_hash) WITH (FILLFACTOR = %d);
CREATE INDEX pos_index ON quads (predicate_hash) WITH (FILLFACTOR = %d); CREATE INDEX pos_index ON quads (predicate_hash) WITH (FILLFACTOR = %d);
CREATE INDEX osp_index ON quads (object_hash) WITH (FILLFACTOR = %d); CREATE INDEX osp_index ON quads (object_hash) WITH (FILLFACTOR = %d);
`, factor, factor, factor)) `, factor, factor, factor))
}
if err != nil { if err != nil {
glog.Errorf("Cannot create indices: %v", index) glog.Errorf("Cannot create indices: %v", index)
return err return err
@ -110,10 +102,22 @@ func newQuadStore(addr string, options graph.Options) (graph.QuadStore, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
localOpt, localOptOk, err := options.BoolKey("local_optimize")
if err != nil {
return nil, err
}
qs.db = conn qs.db = conn
qs.sqlFlavor = "postgres" qs.sqlFlavor = "postgres"
qs.size = -1 qs.size = -1
qs.lru = newCache(1024) qs.lru = newCache(1024)
// Skip size checking by default.
qs.noSizes = true
if localOptOk {
if localOpt {
qs.noSizes = false
}
}
return &qs, nil return &qs, nil
} }
@ -192,25 +196,12 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error {
glog.Errorf("couldn't prepare INSERT statement: %v", err) glog.Errorf("couldn't prepare INSERT statement: %v", err)
return err return err
} }
//for _, dir := range quad.Directions {
//_, err := tx.Exec(`
//WITH upsert AS (UPDATE nodes SET size=size+1 WHERE node=$1 RETURNING *)
//INSERT INTO nodes (node, size) SELECT $1, 1 WHERE NOT EXISTS (SELECT * FROM UPSERT);
//`, d.Quad.Get(dir))
//if err != nil {
//glog.Errorf("couldn't prepare upsert statement in direction %s: %v", dir, err)
//return err
//}
//}
case graph.Delete: case graph.Delete:
_, err := tx.Exec(`DELETE FROM quads WHERE subject=$1 and predicate=$2 and object=$3 and label=$4;`, _, err := tx.Exec(`DELETE FROM quads WHERE subject=$1 and predicate=$2 and object=$3 and label=$4;`,
d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label) d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label)
if err != nil { if err != nil {
glog.Errorf("couldn't prepare DELETE statement: %v", err) glog.Errorf("couldn't prepare DELETE statement: %v", err)
} }
//for _, dir := range quad.Directions {
//tx.Exec(`UPDATE nodes SET size=size-1 WHERE node=$1;`, d.Quad.Get(dir))
//}
default: default:
panic("unknown action") panic("unknown action")
} }

View file

@ -290,7 +290,7 @@ func (it *SQLIterator) makeCursor(next bool, value graph.Value) error {
} }
var q string var q string
var values []string var values []string
q, values = it.sql.buildSQL(next, value, true) q, values = it.sql.buildSQL(next, value)
q = convertToPostgres(q, values) q = convertToPostgres(q, values)
ivalues := make([]interface{}, 0, len(values)) ivalues := make([]interface{}, 0, len(values))
for _, v := range values { for _, v := range values {

View file

@ -71,7 +71,7 @@ type sqlItDir struct {
type sqlIterator interface { type sqlIterator interface {
sqlClone() sqlIterator sqlClone() sqlIterator
buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) buildSQL(next bool, val graph.Value) (string, []string)
getTables() []tableDef getTables() []tableDef
getTags() []tagDir getTags() []tagDir
buildWhere() (string, []string) buildWhere() (string, []string)
@ -143,7 +143,8 @@ func (l *SQLLinkIterator) Size(qs *QuadStore) (int64, bool) {
} }
func (l *SQLLinkIterator) Describe() string { func (l *SQLLinkIterator) Describe() string {
return fmt.Sprintf("SQL_LINK_QUERY: %#v", l) s, _ := l.buildSQL(true, nil)
return fmt.Sprintf("SQL_LINK_QUERY: %s", s)
} }
func (l *SQLLinkIterator) Type() sqlQueryType { func (l *SQLLinkIterator) Type() sqlQueryType {
@ -246,20 +247,13 @@ func (l *SQLLinkIterator) tableID() tagDir {
} }
} }
func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string) {
query := "SELECT " query := "SELECT "
if topLevel {
query += "DISTINCT "
}
hashs := ""
if !topLevel {
hashs = "_hash"
}
t := []string{ t := []string{
fmt.Sprintf("%s.subject%s", l.tableName, hashs), fmt.Sprintf("%s.subject", l.tableName),
fmt.Sprintf("%s.predicate%s", l.tableName, hashs), fmt.Sprintf("%s.predicate", l.tableName),
fmt.Sprintf("%s.object%s", l.tableName, hashs), fmt.Sprintf("%s.object", l.tableName),
fmt.Sprintf("%s.label%s", l.tableName, hashs), fmt.Sprintf("%s.label", l.tableName),
} }
for _, v := range l.getTags() { for _, v := range l.getTags() {
t = append(t, v.String()) t = append(t, v.String())
@ -301,8 +295,6 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, topLevel bool) (s
query += constraint query += constraint
query += ";" query += ";"
glog.V(2).Infoln(query)
if glog.V(4) { if glog.V(4) {
dstr := query dstr := query
for i := 1; i <= len(values); i++ { for i := 1; i <= len(values); i++ {

View file

@ -63,7 +63,8 @@ func (n *SQLNodeIntersection) Size(qs *QuadStore) (int64, bool) {
} }
func (n *SQLNodeIntersection) Describe() string { func (n *SQLNodeIntersection) Describe() string {
return fmt.Sprintf("SQL_NODE_INTERSECTION: %#v", n) s, _ := n.buildSQL(true, nil)
return fmt.Sprintf("SQL_NODE_INTERSECTION: %s", s)
} }
func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string { func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string {
@ -103,7 +104,7 @@ func (n *SQLNodeIntersection) buildSubqueries() []tableDef {
for i, it := range n.nodeIts { for i, it := range n.nodeIts {
var td tableDef var td tableDef
var table string var table string
table, td.values = it.buildSQL(true, nil, false) table, td.values = it.buildSQL(true, nil)
td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1]) td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1])
td.name = n.nodetables[i] td.name = n.nodetables[i]
out = append(out, td) out = append(out, td)
@ -159,14 +160,11 @@ func (n *SQLNodeIntersection) buildWhere() (string, []string) {
return query, vals return query, vals
} }
func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []string) {
topData := n.tableID() topData := n.tableID()
tags := []tagDir{topData} tags := []tagDir{topData}
tags = append(tags, n.getTags()...) tags = append(tags, n.getTags()...)
query := "SELECT " query := "SELECT "
if topLevel {
query += "DISTINCT "
}
var t []string var t []string
for _, v := range tags { for _, v := range tags {
t = append(t, v.String()) t = append(t, v.String())
@ -196,8 +194,6 @@ func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, topLevel bool
query += constraint query += constraint
query += ";" query += ";"
glog.V(2).Infoln(query)
if glog.V(4) { if glog.V(4) {
dstr := query dstr := query
for i := 1; i <= len(values); i++ { for i := 1; i <= len(values); i++ {

View file

@ -83,7 +83,8 @@ func (n *SQLNodeIterator) Size(qs *QuadStore) (int64, bool) {
} }
func (n *SQLNodeIterator) Describe() string { func (n *SQLNodeIterator) Describe() string {
return fmt.Sprintf("SQL_NODE_QUERY: %#v", n) s, _ := n.buildSQL(true, nil)
return fmt.Sprintf("SQL_NODE_QUERY: %s", s)
} }
func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string]string { func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string]string {
@ -160,14 +161,12 @@ func (n *SQLNodeIterator) buildWhere() (string, []string) {
return query, vals return query, vals
} }
func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, topLevel bool) (string, []string) { func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string) {
topData := n.tableID() topData := n.tableID()
tags := []tagDir{topData} tags := []tagDir{topData}
tags = append(tags, n.getTags()...) tags = append(tags, n.getTags()...)
query := "SELECT " query := "SELECT "
if topLevel {
query += "DISTINCT "
}
var t []string var t []string
for _, v := range tags { for _, v := range tags {
t = append(t, v.String()) t = append(t, v.String())
@ -197,8 +196,6 @@ func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, topLevel bool) (s
query += constraint query += constraint
query += ";" query += ";"
glog.V(2).Infoln(query)
if glog.V(4) { if glog.V(4) {
dstr := query dstr := query
for i := 1; i <= len(values); i++ { for i := 1; i <= len(values); i++ {