Add hash-based indexes
This commit is contained in:
parent
c98318aa97
commit
ab3f59d21f
5 changed files with 95 additions and 44 deletions
|
|
@ -1,8 +1,12 @@
|
||||||
package sql
|
package sql
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/sha1"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"encoding/hex"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"hash"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/lib/pq"
|
"github.com/lib/pq"
|
||||||
|
|
||||||
|
|
@ -18,6 +22,13 @@ func init() {
|
||||||
graph.RegisterQuadStore(QuadStoreType, true, newQuadStore, createSQLTables, nil)
|
graph.RegisterQuadStore(QuadStoreType, true, newQuadStore, createSQLTables, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
hashPool = sync.Pool{
|
||||||
|
New: func() interface{} { return sha1.New() },
|
||||||
|
}
|
||||||
|
hashSize = sha1.Size
|
||||||
|
)
|
||||||
|
|
||||||
type QuadStore struct {
|
type QuadStore struct {
|
||||||
db *sql.DB
|
db *sql.DB
|
||||||
sqlFlavor string
|
sqlFlavor string
|
||||||
|
|
@ -55,7 +66,11 @@ func createSQLTables(addr string, options graph.Options) error {
|
||||||
horizon BIGSERIAL PRIMARY KEY,
|
horizon BIGSERIAL PRIMARY KEY,
|
||||||
id BIGINT,
|
id BIGINT,
|
||||||
ts timestamp,
|
ts timestamp,
|
||||||
UNIQUE(subject, predicate, object, label)
|
subject_hash TEXT NOT NULL,
|
||||||
|
predicate_hash TEXT NOT NULL,
|
||||||
|
object_hash TEXT NOT NULL,
|
||||||
|
label_hash TEXT,
|
||||||
|
UNIQUE(subject_hash, predicate_hash, object_hash, label_hash)
|
||||||
);`)
|
);`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Cannot create quad table: %v", quadTable)
|
glog.Errorf("Cannot create quad table: %v", quadTable)
|
||||||
|
|
@ -73,17 +88,11 @@ func createSQLTables(addr string, options graph.Options) error {
|
||||||
CREATE INDEX pos_index ON quads USING brin(predicate) WITH (pages_per_range = 32);
|
CREATE INDEX pos_index ON quads USING brin(predicate) WITH (pages_per_range = 32);
|
||||||
CREATE INDEX osp_index ON quads USING brin(object) WITH (pages_per_range = 32);
|
CREATE INDEX osp_index ON quads USING brin(object) WITH (pages_per_range = 32);
|
||||||
`)
|
`)
|
||||||
} else if idxStrat == "prefix" {
|
|
||||||
index, err = tx.Exec(fmt.Sprintf(`
|
|
||||||
CREATE INDEX spo_index ON quads (substr(subject, 0, 8)) WITH (FILLFACTOR = %d);
|
|
||||||
CREATE INDEX pos_index ON quads (substr(predicate, 0, 8)) WITH (FILLFACTOR = %d);
|
|
||||||
CREATE INDEX osp_index ON quads (substr(object, 0, 8)) WITH (FILLFACTOR = %d);
|
|
||||||
`, factor, factor, factor))
|
|
||||||
} else {
|
} else {
|
||||||
index, err = tx.Exec(fmt.Sprintf(`
|
index, err = tx.Exec(fmt.Sprintf(`
|
||||||
CREATE INDEX spo_index ON quads (subject, predicate, object) WITH (FILLFACTOR = %d);
|
CREATE INDEX spo_index ON quads (subject_hash) WITH (FILLFACTOR = %d);
|
||||||
CREATE INDEX pos_index ON quads (predicate, object, subject) WITH (FILLFACTOR = %d);
|
CREATE INDEX pos_index ON quads (predicate_hash) WITH (FILLFACTOR = %d);
|
||||||
CREATE INDEX osp_index ON quads (object, subject, predicate) WITH (FILLFACTOR = %d);
|
CREATE INDEX osp_index ON quads (object_hash) WITH (FILLFACTOR = %d);
|
||||||
`, factor, factor, factor))
|
`, factor, factor, factor))
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -107,13 +116,34 @@ func newQuadStore(addr string, options graph.Options) (graph.QuadStore, error) {
|
||||||
return &qs, nil
|
return &qs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func hashOf(s string) string {
|
||||||
|
h := hashPool.Get().(hash.Hash)
|
||||||
|
h.Reset()
|
||||||
|
defer hashPool.Put(h)
|
||||||
|
key := make([]byte, 0, hashSize)
|
||||||
|
h.Write([]byte(s))
|
||||||
|
key = h.Sum(key)
|
||||||
|
return hex.EncodeToString(key)
|
||||||
|
}
|
||||||
|
|
||||||
func (qs *QuadStore) copyFrom(tx *sql.Tx, in []graph.Delta) error {
|
func (qs *QuadStore) copyFrom(tx *sql.Tx, in []graph.Delta) error {
|
||||||
stmt, err := tx.Prepare(pq.CopyIn("quads", "subject", "predicate", "object", "label", "id", "ts"))
|
stmt, err := tx.Prepare(pq.CopyIn("quads", "subject", "predicate", "object", "label", "id", "ts", "subject_hash", "predicate_hash", "object_hash", "label_hash"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, d := range in {
|
for _, d := range in {
|
||||||
_, err := stmt.Exec(d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label, d.ID.Int(), d.Timestamp)
|
_, err := stmt.Exec(
|
||||||
|
d.Quad.Subject,
|
||||||
|
d.Quad.Predicate,
|
||||||
|
d.Quad.Object,
|
||||||
|
d.Quad.Label,
|
||||||
|
d.ID.Int(),
|
||||||
|
d.Timestamp,
|
||||||
|
hashOf(d.Quad.Subject),
|
||||||
|
hashOf(d.Quad.Predicate),
|
||||||
|
hashOf(d.Quad.Object),
|
||||||
|
hashOf(d.Quad.Label),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("couldn't prepare COPY statement: %v", err)
|
glog.Errorf("couldn't prepare COPY statement: %v", err)
|
||||||
return err
|
return err
|
||||||
|
|
@ -137,7 +167,7 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error {
|
||||||
return qs.copyFrom(tx, in)
|
return qs.copyFrom(tx, in)
|
||||||
}
|
}
|
||||||
|
|
||||||
insert, err := tx.Prepare(`INSERT INTO quads(subject, predicate, object, label, id, ts) VALUES ($1, $2, $3, $4, $5, $6)`)
|
insert, err := tx.Prepare(`INSERT INTO quads(subject, predicate, object, label, id, ts, subject_hash, predicate_hash, object_hash, label_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Cannot prepare insert statement: %v", err)
|
glog.Errorf("Cannot prepare insert statement: %v", err)
|
||||||
return err
|
return err
|
||||||
|
|
@ -145,7 +175,18 @@ func (qs *QuadStore) buildTxPostgres(tx *sql.Tx, in []graph.Delta) error {
|
||||||
for _, d := range in {
|
for _, d := range in {
|
||||||
switch d.Action {
|
switch d.Action {
|
||||||
case graph.Add:
|
case graph.Add:
|
||||||
_, err := insert.Exec(d.Quad.Subject, d.Quad.Predicate, d.Quad.Object, d.Quad.Label, d.ID.Int(), d.Timestamp)
|
_, err := insert.Exec(
|
||||||
|
d.Quad.Subject,
|
||||||
|
d.Quad.Predicate,
|
||||||
|
d.Quad.Object,
|
||||||
|
d.Quad.Label,
|
||||||
|
d.ID.Int(),
|
||||||
|
d.Timestamp,
|
||||||
|
hashOf(d.Quad.Subject),
|
||||||
|
hashOf(d.Quad.Predicate),
|
||||||
|
hashOf(d.Quad.Object),
|
||||||
|
hashOf(d.Quad.Label),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("couldn't prepare INSERT statement: %v", err)
|
glog.Errorf("couldn't prepare INSERT statement: %v", err)
|
||||||
return err
|
return err
|
||||||
|
|
@ -271,7 +312,7 @@ func (qs *QuadStore) sizeForIterator(isAll bool, dir quad.Direction, val string)
|
||||||
var size int64
|
var size int64
|
||||||
glog.V(4).Infoln("sql: getting size for select %s, %s", dir.String(), val)
|
glog.V(4).Infoln("sql: getting size for select %s, %s", dir.String(), val)
|
||||||
err = qs.db.QueryRow(
|
err = qs.db.QueryRow(
|
||||||
fmt.Sprintf("SELECT count(*) FROM quads WHERE %s = $1;", dir.String()), val).Scan(&size)
|
fmt.Sprintf("SELECT count(*) FROM quads WHERE %s_hash = $1;", dir.String()), hashOf(val)).Scan(&size)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorln("Error getting size from SQL database: %v", err)
|
glog.Errorln("Error getting size from SQL database: %v", err)
|
||||||
return 0
|
return 0
|
||||||
|
|
|
||||||
|
|
@ -290,7 +290,7 @@ func (it *SQLIterator) makeCursor(next bool, value graph.Value) error {
|
||||||
}
|
}
|
||||||
var q string
|
var q string
|
||||||
var values []string
|
var values []string
|
||||||
q, values = it.sql.buildSQL(next, value)
|
q, values = it.sql.buildSQL(next, value, false)
|
||||||
q = convertToPostgres(q, values)
|
q = convertToPostgres(q, values)
|
||||||
ivalues := make([]interface{}, 0, len(values))
|
ivalues := make([]interface{}, 0, len(values))
|
||||||
for _, v := range values {
|
for _, v := range values {
|
||||||
|
|
|
||||||
|
|
@ -50,11 +50,11 @@ type tagDir struct {
|
||||||
func (t tagDir) String() string {
|
func (t tagDir) String() string {
|
||||||
if t.dir == quad.Any {
|
if t.dir == quad.Any {
|
||||||
if t.justLocal {
|
if t.justLocal {
|
||||||
return fmt.Sprintf("%s.__execd as %s", t.table, t.tag)
|
return fmt.Sprintf("%s.__execd as %s, %s.__execd_hash as %s_hash", t.table, t.tag, t.table, t.tag)
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("%s.%s as %s", t.table, t.tag, t.tag)
|
return fmt.Sprintf("%s.%s as %s, %s.%s_hash as %s_hash", t.table, t.tag, t.tag, t.table, t.tag, t.tag)
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("%s.%s as %s", t.table, t.dir, t.tag)
|
return fmt.Sprintf("%s.%s as %s, %s.%s_hash as %s_hash", t.table, t.dir, t.tag, t.table, t.dir, t.tag)
|
||||||
}
|
}
|
||||||
|
|
||||||
type tableDef struct {
|
type tableDef struct {
|
||||||
|
|
@ -71,7 +71,7 @@ type sqlItDir struct {
|
||||||
type sqlIterator interface {
|
type sqlIterator interface {
|
||||||
sqlClone() sqlIterator
|
sqlClone() sqlIterator
|
||||||
|
|
||||||
buildSQL(next bool, val graph.Value) (string, []string)
|
buildSQL(next bool, val graph.Value, hash bool) (string, []string)
|
||||||
getTables() []tableDef
|
getTables() []tableDef
|
||||||
getTags() []tagDir
|
getTags() []tagDir
|
||||||
buildWhere() (string, []string)
|
buildWhere() (string, []string)
|
||||||
|
|
@ -219,8 +219,8 @@ func (l *SQLLinkIterator) buildWhere() (string, []string) {
|
||||||
var q []string
|
var q []string
|
||||||
var vals []string
|
var vals []string
|
||||||
for _, c := range l.constraints {
|
for _, c := range l.constraints {
|
||||||
q = append(q, fmt.Sprintf("%s.%s = ?", l.tableName, c.dir))
|
q = append(q, fmt.Sprintf("%s.%s_hash = ?", l.tableName, c.dir))
|
||||||
vals = append(vals, c.vals[0])
|
vals = append(vals, hashOf(c.vals[0]))
|
||||||
}
|
}
|
||||||
for _, i := range l.nodeIts {
|
for _, i := range l.nodeIts {
|
||||||
t := i.it.tableID()
|
t := i.it.tableID()
|
||||||
|
|
@ -228,7 +228,7 @@ func (l *SQLLinkIterator) buildWhere() (string, []string) {
|
||||||
if t.dir == quad.Any {
|
if t.dir == quad.Any {
|
||||||
dir = t.tag
|
dir = t.tag
|
||||||
}
|
}
|
||||||
q = append(q, fmt.Sprintf("%s.%s = %s.%s", l.tableName, i.dir, t.table, dir))
|
q = append(q, fmt.Sprintf("%s.%s_hash = %s.%s_hash", l.tableName, i.dir, t.table, dir))
|
||||||
}
|
}
|
||||||
for _, i := range l.nodeIts {
|
for _, i := range l.nodeIts {
|
||||||
s, v := i.it.buildWhere()
|
s, v := i.it.buildWhere()
|
||||||
|
|
@ -246,13 +246,17 @@ func (l *SQLLinkIterator) tableID() tagDir {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string) {
|
func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value, hash bool) (string, []string) {
|
||||||
query := "SELECT DISTINCT "
|
query := "SELECT DISTINCT "
|
||||||
|
hashs := ""
|
||||||
|
if hash {
|
||||||
|
hashs = "_hash"
|
||||||
|
}
|
||||||
t := []string{
|
t := []string{
|
||||||
fmt.Sprintf("%s.subject", l.tableName),
|
fmt.Sprintf("%s.subject%s", l.tableName, hashs),
|
||||||
fmt.Sprintf("%s.predicate", l.tableName),
|
fmt.Sprintf("%s.predicate%s", l.tableName, hashs),
|
||||||
fmt.Sprintf("%s.object", l.tableName),
|
fmt.Sprintf("%s.object%s", l.tableName, hashs),
|
||||||
fmt.Sprintf("%s.label", l.tableName),
|
fmt.Sprintf("%s.label%s", l.tableName, hashs),
|
||||||
}
|
}
|
||||||
for _, v := range l.getTags() {
|
for _, v := range l.getTags() {
|
||||||
t = append(t, v.String())
|
t = append(t, v.String())
|
||||||
|
|
@ -276,16 +280,16 @@ func (l *SQLLinkIterator) buildSQL(next bool, val graph.Value) (string, []string
|
||||||
constraint += " AND "
|
constraint += " AND "
|
||||||
}
|
}
|
||||||
t = []string{
|
t = []string{
|
||||||
fmt.Sprintf("%s.subject = ?", l.tableName),
|
fmt.Sprintf("%s.subject_hash = ?", l.tableName),
|
||||||
fmt.Sprintf("%s.predicate = ?", l.tableName),
|
fmt.Sprintf("%s.predicate_hash = ?", l.tableName),
|
||||||
fmt.Sprintf("%s.object = ?", l.tableName),
|
fmt.Sprintf("%s.object_hash = ?", l.tableName),
|
||||||
fmt.Sprintf("%s.label = ?", l.tableName),
|
fmt.Sprintf("%s.label_hash = ?", l.tableName),
|
||||||
}
|
}
|
||||||
constraint += strings.Join(t, " AND ")
|
constraint += strings.Join(t, " AND ")
|
||||||
values = append(values, v.Subject)
|
values = append(values, hashOf(v.Subject))
|
||||||
values = append(values, v.Predicate)
|
values = append(values, hashOf(v.Predicate))
|
||||||
values = append(values, v.Object)
|
values = append(values, hashOf(v.Object))
|
||||||
values = append(values, v.Label)
|
values = append(values, hashOf(v.Label))
|
||||||
}
|
}
|
||||||
query += constraint
|
query += constraint
|
||||||
query += ";"
|
query += ";"
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,9 @@ func (n *SQLNodeIntersection) Describe() string {
|
||||||
func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string {
|
func (n *SQLNodeIntersection) buildResult(result []string, cols []string) map[string]string {
|
||||||
m := make(map[string]string)
|
m := make(map[string]string)
|
||||||
for i, c := range cols {
|
for i, c := range cols {
|
||||||
|
if strings.HasSuffix(c, "_hash") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if c == "__execd" {
|
if c == "__execd" {
|
||||||
n.result = result[i]
|
n.result = result[i]
|
||||||
}
|
}
|
||||||
|
|
@ -100,7 +103,7 @@ func (n *SQLNodeIntersection) buildSubqueries() []tableDef {
|
||||||
for i, it := range n.nodeIts {
|
for i, it := range n.nodeIts {
|
||||||
var td tableDef
|
var td tableDef
|
||||||
var table string
|
var table string
|
||||||
table, td.values = it.buildSQL(true, nil)
|
table, td.values = it.buildSQL(true, nil, true)
|
||||||
td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1])
|
td.table = fmt.Sprintf("\n(%s)", table[:len(table)-1])
|
||||||
td.name = n.nodetables[i]
|
td.name = n.nodetables[i]
|
||||||
out = append(out, td)
|
out = append(out, td)
|
||||||
|
|
@ -150,13 +153,13 @@ func (n *SQLNodeIntersection) buildWhere() (string, []string) {
|
||||||
var q []string
|
var q []string
|
||||||
var vals []string
|
var vals []string
|
||||||
for _, tb := range n.nodetables[1:] {
|
for _, tb := range n.nodetables[1:] {
|
||||||
q = append(q, fmt.Sprintf("%s.__execd = %s.__execd", n.nodetables[0], tb))
|
q = append(q, fmt.Sprintf("%s.__execd_hash = %s.__execd_hash", n.nodetables[0], tb))
|
||||||
}
|
}
|
||||||
query := strings.Join(q, " AND ")
|
query := strings.Join(q, " AND ")
|
||||||
return query, vals
|
return query, vals
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []string) {
|
func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value, _ bool) (string, []string) {
|
||||||
topData := n.tableID()
|
topData := n.tableID()
|
||||||
tags := []tagDir{topData}
|
tags := []tagDir{topData}
|
||||||
tags = append(tags, n.getTags()...)
|
tags = append(tags, n.getTags()...)
|
||||||
|
|
@ -184,8 +187,8 @@ func (n *SQLNodeIntersection) buildSQL(next bool, val graph.Value) (string, []st
|
||||||
if constraint != "" {
|
if constraint != "" {
|
||||||
constraint += " AND "
|
constraint += " AND "
|
||||||
}
|
}
|
||||||
constraint += fmt.Sprintf("%s.%s = ?", topData.table, topData.dir)
|
constraint += fmt.Sprintf("%s.%s_hash = ?", topData.table, topData.dir)
|
||||||
values = append(values, v)
|
values = append(values, hashOf(v))
|
||||||
}
|
}
|
||||||
query += constraint
|
query += constraint
|
||||||
query += ";"
|
query += ";"
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,9 @@ func (n *SQLNodeIterator) Describe() string {
|
||||||
func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string]string {
|
func (n *SQLNodeIterator) buildResult(result []string, cols []string) map[string]string {
|
||||||
m := make(map[string]string)
|
m := make(map[string]string)
|
||||||
for i, c := range cols {
|
for i, c := range cols {
|
||||||
|
if strings.HasSuffix(c, "_hash") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if c == "__execd" {
|
if c == "__execd" {
|
||||||
n.result = result[i]
|
n.result = result[i]
|
||||||
}
|
}
|
||||||
|
|
@ -157,7 +160,7 @@ func (n *SQLNodeIterator) buildWhere() (string, []string) {
|
||||||
return query, vals
|
return query, vals
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string) {
|
func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value, _ bool) (string, []string) {
|
||||||
topData := n.tableID()
|
topData := n.tableID()
|
||||||
tags := []tagDir{topData}
|
tags := []tagDir{topData}
|
||||||
tags = append(tags, n.getTags()...)
|
tags = append(tags, n.getTags()...)
|
||||||
|
|
@ -185,8 +188,8 @@ func (n *SQLNodeIterator) buildSQL(next bool, val graph.Value) (string, []string
|
||||||
if constraint != "" {
|
if constraint != "" {
|
||||||
constraint += " AND "
|
constraint += " AND "
|
||||||
}
|
}
|
||||||
constraint += fmt.Sprintf("%s.%s = ?", topData.table, topData.dir)
|
constraint += fmt.Sprintf("%s.%s_hash = ?", topData.table, topData.dir)
|
||||||
values = append(values, v)
|
values = append(values, hashOf(v))
|
||||||
}
|
}
|
||||||
query += constraint
|
query += constraint
|
||||||
query += ";"
|
query += ";"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue