From 737037a894b3a452042d69aed3a5bff42170376b Mon Sep 17 00:00:00 2001 From: kortschak Date: Thu, 14 Aug 2014 19:03:55 +0930 Subject: [PATCH 1/2] Experiment with sync.Pool --- graph/mongo/iterator.go | 8 ++++---- graph/mongo/triplestore.go | 51 +++++++++++++++++++++++++--------------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index 03a8560..9d61711 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -185,13 +185,13 @@ func (it *Iterator) Contains(v graph.Value) bool { case quad.Subject: offset = 0 case quad.Predicate: - offset = (it.qs.hasherSize * 2) + offset = (hashSize * 2) case quad.Object: - offset = (it.qs.hasherSize * 2) * 2 + offset = (hashSize * 2) * 2 case quad.Label: - offset = (it.qs.hasherSize * 2) * 3 + offset = (hashSize * 2) * 3 } - val := v.(string)[offset : it.qs.hasherSize*2+offset] + val := v.(string)[offset : hashSize*2+offset] if val == it.hash { it.result = v return graph.ContainsLogOut(it, v, true) diff --git a/graph/mongo/triplestore.go b/graph/mongo/triplestore.go index 6c2a334..044e174 100644 --- a/graph/mongo/triplestore.go +++ b/graph/mongo/triplestore.go @@ -19,6 +19,7 @@ import ( "encoding/hex" "hash" "io" + "sync" "gopkg.in/mgo.v2" "gopkg.in/mgo.v2/bson" @@ -38,12 +39,17 @@ var _ graph.BulkLoader = (*TripleStore)(nil) const DefaultDBName = "cayley" +var ( + hashPool = sync.Pool{ + New: func() interface{} { return sha1.New() }, + } + hashSize = sha1.Size +) + type TripleStore struct { - session *mgo.Session - db *mgo.Database - hasherSize int - makeHasher func() hash.Hash - idCache *IDLru + session *mgo.Session + db *mgo.Database + idCache *IDLru } func createNewMongoGraph(addr string, options graph.Options) error { @@ -87,26 +93,26 @@ func newTripleStore(addr string, options graph.Options) (graph.TripleStore, erro } qs.db = conn.DB(dbName) qs.session = conn - qs.hasherSize = sha1.Size - qs.makeHasher = sha1.New qs.idCache = NewIDLru(1 << 16) return &qs, nil } func (qs *TripleStore) getIdForTriple(t quad.Quad) string { - hasher := qs.makeHasher() - id := qs.convertStringToByteHash(t.Subject, hasher) - id += qs.convertStringToByteHash(t.Predicate, hasher) - id += qs.convertStringToByteHash(t.Object, hasher) - id += qs.convertStringToByteHash(t.Label, hasher) + id := qs.convertStringToByteHash(t.Subject) + id += qs.convertStringToByteHash(t.Predicate) + id += qs.convertStringToByteHash(t.Object) + id += qs.convertStringToByteHash(t.Label) return id } -func (qs *TripleStore) convertStringToByteHash(s string, hasher hash.Hash) string { - hasher.Reset() - key := make([]byte, 0, qs.hasherSize) - hasher.Write([]byte(s)) - key = hasher.Sum(key) +func (qs *TripleStore) convertStringToByteHash(s string) string { + h := hashPool.Get().(hash.Hash) + h.Reset() + defer hashPool.Put(h) + + key := make([]byte, 0, hashSize) + h.Write([]byte(s)) + key = h.Sum(key) return hex.EncodeToString(key) } @@ -246,8 +252,7 @@ func (qs *TripleStore) TriplesAllIterator() graph.Iterator { } func (qs *TripleStore) ValueOf(s string) graph.Value { - h := qs.makeHasher() - return qs.convertStringToByteHash(s, h) + return qs.convertStringToByteHash(s) } func (qs *TripleStore) NameOf(v graph.Value) string { @@ -292,13 +297,13 @@ func (qs *TripleStore) TripleDirection(in graph.Value, d quad.Direction) graph.V case quad.Subject: offset = 0 case quad.Predicate: - offset = (qs.hasherSize * 2) + offset = (hashSize * 2) case quad.Object: - offset = (qs.hasherSize * 2) * 2 + offset = (hashSize * 2) * 2 case quad.Label: - offset = (qs.hasherSize * 2) * 3 + offset = (hashSize * 2) * 3 } - val := in.(string)[offset : qs.hasherSize*2+offset] + val := in.(string)[offset : hashSize*2+offset] return val } From de882b9f93a6f2b9a901c78daf0fc47fefc5bf3d Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Thu, 14 Aug 2014 20:18:37 -0400 Subject: [PATCH 2/2] port hasher pool to leveldb --- graph/leveldb/iterator.go | 20 +++++++------- graph/leveldb/triplestore.go | 65 +++++++++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/graph/leveldb/iterator.go b/graph/leveldb/iterator.go index 28244cc..f87af3b 100644 --- a/graph/leveldb/iterator.go +++ b/graph/leveldb/iterator.go @@ -43,7 +43,7 @@ type Iterator struct { func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleStore) *Iterator { vb := value.(Token) - p := make([]byte, 0, 2+qs.hasherSize) + p := make([]byte, 0, 2+hashSize) p = append(p, []byte(prefix)...) p = append(p, []byte(vb[1:])...) @@ -169,9 +169,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { case quad.Subject: return 2 case quad.Predicate: - return qs.hasherSize + 2 + return hashSize + 2 case quad.Object: - return 2*qs.hasherSize + 2 + return 2*hashSize + 2 case quad.Label: return -1 } @@ -179,11 +179,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("po")) { switch d { case quad.Subject: - return 2*qs.hasherSize + 2 + return 2*hashSize + 2 case quad.Predicate: return 2 case quad.Object: - return qs.hasherSize + 2 + return hashSize + 2 case quad.Label: return -1 } @@ -191,9 +191,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("os")) { switch d { case quad.Subject: - return qs.hasherSize + 2 + return hashSize + 2 case quad.Predicate: - return 2*qs.hasherSize + 2 + return 2*hashSize + 2 case quad.Object: return 2 case quad.Label: @@ -203,11 +203,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("cp")) { switch d { case quad.Subject: - return 2*qs.hasherSize + 2 + return 2*hashSize + 2 case quad.Predicate: - return qs.hasherSize + 2 + return hashSize + 2 case quad.Object: - return 3*qs.hasherSize + 2 + return 3*hashSize + 2 case quad.Label: return 2 } diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index 74626d8..e18e683 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -21,6 +21,7 @@ import ( "encoding/json" "fmt" "hash" + "sync" "github.com/barakmich/glog" "github.com/syndtr/goleveldb/leveldb" @@ -42,6 +43,13 @@ const ( DefaultWriteBufferSize = 20 ) +var ( + hashPool = sync.Pool{ + New: func() interface{} { return sha1.New() }, + } + hashSize = sha1.Size +) + type Token []byte func (t Token) Key() interface{} { @@ -49,15 +57,13 @@ func (t Token) Key() interface{} { } type TripleStore struct { - dbOpts *opt.Options - db *leveldb.DB - path string - open bool - size int64 - hasherSize int - makeHasher func() hash.Hash - writeopts *opt.WriteOptions - readopts *opt.ReadOptions + dbOpts *opt.Options + db *leveldb.DB + path string + open bool + size int64 + writeopts *opt.WriteOptions + readopts *opt.ReadOptions } func createNewLevelDB(path string, _ graph.Options) error { @@ -94,8 +100,6 @@ func newTripleStore(path string, options graph.Options) (graph.TripleStore, erro write_buffer_mb = val } qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB - qs.hasherSize = sha1.Size - qs.makeHasher = sha1.New qs.writeopts = &opt.WriteOptions{ Sync: false, } @@ -125,33 +129,30 @@ func (qs *TripleStore) Size() int64 { } func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { - hasher := qs.makeHasher() - key := make([]byte, 0, 2+(qs.hasherSize*3)) + key := make([]byte, 0, 2+(hashSize*3)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[0]), hasher)...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[1]), hasher)...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[2]), hasher)...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) return key } func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { - hasher := qs.makeHasher() - key := make([]byte, 0, 2+(qs.hasherSize*4)) + key := make([]byte, 0, 2+(hashSize*4)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...) - key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label), hasher)...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[0]), hasher)...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[1]), hasher)...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[2]), hasher)...) + key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label))...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) return key } func (qs *TripleStore) createValueKeyFor(s string) []byte { - hasher := qs.makeHasher() - key := make([]byte, 0, 1+qs.hasherSize) + key := make([]byte, 0, 1+hashSize) key = append(key, []byte("z")...) - key = append(key, qs.convertStringToByteHash(s, hasher)...) + key = append(key, qs.convertStringToByteHash(s)...) return key } @@ -336,11 +337,13 @@ func (qs *TripleStore) Quad(k graph.Value) quad.Quad { return triple } -func (qs *TripleStore) convertStringToByteHash(s string, hasher hash.Hash) []byte { - hasher.Reset() - key := make([]byte, 0, qs.hasherSize) - hasher.Write([]byte(s)) - key = hasher.Sum(key) +func (qs *TripleStore) convertStringToByteHash(s string) []byte { + h := hashPool.Get().(hash.Hash) + h.Reset() + defer hashPool.Put(h) + key := make([]byte, 0, hashSize) + h.Write([]byte(s)) + key = h.Sum(key) return key } @@ -446,7 +449,7 @@ func (qs *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph. v := val.(Token) offset := PositionOf(v[0:2], d, qs) if offset != -1 { - return Token(append([]byte("z"), v[offset:offset+qs.hasherSize]...)) + return Token(append([]byte("z"), v[offset:offset+hashSize]...)) } else { return Token(qs.Quad(val).Get(d)) }