port hasher pool to leveldb

This commit is contained in:
Barak Michener 2014-08-14 20:18:37 -04:00
parent 737037a894
commit de882b9f93
2 changed files with 44 additions and 41 deletions

View file

@ -43,7 +43,7 @@ type Iterator struct {
func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleStore) *Iterator { func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleStore) *Iterator {
vb := value.(Token) vb := value.(Token)
p := make([]byte, 0, 2+qs.hasherSize) p := make([]byte, 0, 2+hashSize)
p = append(p, []byte(prefix)...) p = append(p, []byte(prefix)...)
p = append(p, []byte(vb[1:])...) p = append(p, []byte(vb[1:])...)
@ -169,9 +169,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
case quad.Subject: case quad.Subject:
return 2 return 2
case quad.Predicate: case quad.Predicate:
return qs.hasherSize + 2 return hashSize + 2
case quad.Object: case quad.Object:
return 2*qs.hasherSize + 2 return 2*hashSize + 2
case quad.Label: case quad.Label:
return -1 return -1
} }
@ -179,11 +179,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
if bytes.Equal(prefix, []byte("po")) { if bytes.Equal(prefix, []byte("po")) {
switch d { switch d {
case quad.Subject: case quad.Subject:
return 2*qs.hasherSize + 2 return 2*hashSize + 2
case quad.Predicate: case quad.Predicate:
return 2 return 2
case quad.Object: case quad.Object:
return qs.hasherSize + 2 return hashSize + 2
case quad.Label: case quad.Label:
return -1 return -1
} }
@ -191,9 +191,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
if bytes.Equal(prefix, []byte("os")) { if bytes.Equal(prefix, []byte("os")) {
switch d { switch d {
case quad.Subject: case quad.Subject:
return qs.hasherSize + 2 return hashSize + 2
case quad.Predicate: case quad.Predicate:
return 2*qs.hasherSize + 2 return 2*hashSize + 2
case quad.Object: case quad.Object:
return 2 return 2
case quad.Label: case quad.Label:
@ -203,11 +203,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
if bytes.Equal(prefix, []byte("cp")) { if bytes.Equal(prefix, []byte("cp")) {
switch d { switch d {
case quad.Subject: case quad.Subject:
return 2*qs.hasherSize + 2 return 2*hashSize + 2
case quad.Predicate: case quad.Predicate:
return qs.hasherSize + 2 return hashSize + 2
case quad.Object: case quad.Object:
return 3*qs.hasherSize + 2 return 3*hashSize + 2
case quad.Label: case quad.Label:
return 2 return 2
} }

View file

@ -21,6 +21,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"hash" "hash"
"sync"
"github.com/barakmich/glog" "github.com/barakmich/glog"
"github.com/syndtr/goleveldb/leveldb" "github.com/syndtr/goleveldb/leveldb"
@ -42,6 +43,13 @@ const (
DefaultWriteBufferSize = 20 DefaultWriteBufferSize = 20
) )
var (
hashPool = sync.Pool{
New: func() interface{} { return sha1.New() },
}
hashSize = sha1.Size
)
type Token []byte type Token []byte
func (t Token) Key() interface{} { func (t Token) Key() interface{} {
@ -49,15 +57,13 @@ func (t Token) Key() interface{} {
} }
type TripleStore struct { type TripleStore struct {
dbOpts *opt.Options dbOpts *opt.Options
db *leveldb.DB db *leveldb.DB
path string path string
open bool open bool
size int64 size int64
hasherSize int writeopts *opt.WriteOptions
makeHasher func() hash.Hash readopts *opt.ReadOptions
writeopts *opt.WriteOptions
readopts *opt.ReadOptions
} }
func createNewLevelDB(path string, _ graph.Options) error { func createNewLevelDB(path string, _ graph.Options) error {
@ -94,8 +100,6 @@ func newTripleStore(path string, options graph.Options) (graph.TripleStore, erro
write_buffer_mb = val write_buffer_mb = val
} }
qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB
qs.hasherSize = sha1.Size
qs.makeHasher = sha1.New
qs.writeopts = &opt.WriteOptions{ qs.writeopts = &opt.WriteOptions{
Sync: false, Sync: false,
} }
@ -125,33 +129,30 @@ func (qs *TripleStore) Size() int64 {
} }
func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
hasher := qs.makeHasher() key := make([]byte, 0, 2+(hashSize*3))
key := make([]byte, 0, 2+(qs.hasherSize*3))
// TODO(kortschak) Remove dependence on String() method. // TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]), hasher)...) key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]), hasher)...) key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]), hasher)...) key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
return key return key
} }
func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
hasher := qs.makeHasher() key := make([]byte, 0, 2+(hashSize*4))
key := make([]byte, 0, 2+(qs.hasherSize*4))
// TODO(kortschak) Remove dependence on String() method. // TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...) key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label), hasher)...) key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]), hasher)...) key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]), hasher)...) key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]), hasher)...) key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
return key return key
} }
func (qs *TripleStore) createValueKeyFor(s string) []byte { func (qs *TripleStore) createValueKeyFor(s string) []byte {
hasher := qs.makeHasher() key := make([]byte, 0, 1+hashSize)
key := make([]byte, 0, 1+qs.hasherSize)
key = append(key, []byte("z")...) key = append(key, []byte("z")...)
key = append(key, qs.convertStringToByteHash(s, hasher)...) key = append(key, qs.convertStringToByteHash(s)...)
return key return key
} }
@ -336,11 +337,13 @@ func (qs *TripleStore) Quad(k graph.Value) quad.Quad {
return triple return triple
} }
func (qs *TripleStore) convertStringToByteHash(s string, hasher hash.Hash) []byte { func (qs *TripleStore) convertStringToByteHash(s string) []byte {
hasher.Reset() h := hashPool.Get().(hash.Hash)
key := make([]byte, 0, qs.hasherSize) h.Reset()
hasher.Write([]byte(s)) defer hashPool.Put(h)
key = hasher.Sum(key) key := make([]byte, 0, hashSize)
h.Write([]byte(s))
key = h.Sum(key)
return key return key
} }
@ -446,7 +449,7 @@ func (qs *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph.
v := val.(Token) v := val.(Token)
offset := PositionOf(v[0:2], d, qs) offset := PositionOf(v[0:2], d, qs)
if offset != -1 { if offset != -1 {
return Token(append([]byte("z"), v[offset:offset+qs.hasherSize]...)) return Token(append([]byte("z"), v[offset:offset+hashSize]...))
} else { } else {
return Token(qs.Quad(val).Get(d)) return Token(qs.Quad(val).Get(d))
} }