From 104e7d110da3aadcf44bc2e3a44f275770d386a2 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 13 Aug 2014 13:08:50 -0400 Subject: [PATCH 1/3] fix leveldb (and speed up hasa) --- graph/iterator/hasa_iterator.go | 3 +-- graph/leveldb/iterator.go | 20 +++++++-------- graph/leveldb/triplestore.go | 57 ++++++++++++++++++++++------------------- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/graph/iterator/hasa_iterator.go b/graph/iterator/hasa_iterator.go index 99dec4f..b88f58b 100644 --- a/graph/iterator/hasa_iterator.go +++ b/graph/iterator/hasa_iterator.go @@ -202,8 +202,7 @@ func (it *HasA) Next() bool { return graph.NextLogOut(it, 0, false) } tID := it.primaryIt.Result() - name := it.ts.Quad(tID).Get(it.dir) - val := it.ts.ValueOf(name) + val := it.ts.TripleDirection(tID, it.dir) it.result = val return graph.NextLogOut(it, val, true) } diff --git a/graph/leveldb/iterator.go b/graph/leveldb/iterator.go index 4a1172c..035224c 100644 --- a/graph/leveldb/iterator.go +++ b/graph/leveldb/iterator.go @@ -43,7 +43,7 @@ type Iterator struct { func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleStore) *Iterator { vb := value.(Token) - p := make([]byte, 0, 2+qs.hasher.Size()) + p := make([]byte, 0, 2+qs.hasher_size) p = append(p, []byte(prefix)...) p = append(p, []byte(vb[1:])...) @@ -169,9 +169,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { case quad.Subject: return 2 case quad.Predicate: - return qs.hasher.Size() + 2 + return qs.hasher_size + 2 case quad.Object: - return 2*qs.hasher.Size() + 2 + return 2*qs.hasher_size + 2 case quad.Label: return -1 } @@ -179,11 +179,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("po")) { switch d { case quad.Subject: - return 2*qs.hasher.Size() + 2 + return 2*qs.hasher_size + 2 case quad.Predicate: return 2 case quad.Object: - return qs.hasher.Size() + 2 + return qs.hasher_size + 2 case quad.Label: return -1 } @@ -191,9 +191,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("os")) { switch d { case quad.Subject: - return qs.hasher.Size() + 2 + return qs.hasher_size + 2 case quad.Predicate: - return 2*qs.hasher.Size() + 2 + return 2*qs.hasher_size + 2 case quad.Object: return 2 case quad.Label: @@ -203,11 +203,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("cp")) { switch d { case quad.Subject: - return 2*qs.hasher.Size() + 2 + return 2*qs.hasher_size + 2 case quad.Predicate: - return qs.hasher.Size() + 2 + return qs.hasher_size + 2 case quad.Object: - return 3*qs.hasher.Size() + 2 + return 3*qs.hasher_size + 2 case quad.Label: return 2 } diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index 7efb03f..71304df 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -49,14 +49,15 @@ func (t Token) Key() interface{} { } type TripleStore struct { - dbOpts *opt.Options - db *leveldb.DB - path string - open bool - size int64 - hasher hash.Hash - writeopts *opt.WriteOptions - readopts *opt.ReadOptions + dbOpts *opt.Options + db *leveldb.DB + path string + open bool + size int64 + hasher_size int + make_hasher func() hash.Hash + writeopts *opt.WriteOptions + readopts *opt.ReadOptions } func createNewLevelDB(path string, _ graph.Options) error { @@ -93,7 +94,8 @@ func newTripleStore(path string, options graph.Options) (graph.TripleStore, erro write_buffer_mb = val } qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB - qs.hasher = sha1.New() + qs.hasher_size = sha1.Size + qs.make_hasher = func() hash.Hash { return sha1.New() } qs.writeopts = &opt.WriteOptions{ Sync: false, } @@ -123,30 +125,33 @@ func (qs *TripleStore) Size() int64 { } func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { - key := make([]byte, 0, 2+(qs.hasher.Size()*3)) + hasher := qs.make_hasher() + key := make([]byte, 0, 2+(qs.hasher_size*3)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[0]), hasher)...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[1]), hasher)...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[2]), hasher)...) return key } func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { - key := make([]byte, 0, 2+(qs.hasher.Size()*4)) + hasher := qs.make_hasher() + key := make([]byte, 0, 2+(qs.hasher_size*4)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...) - key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) + key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label), hasher)...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[0]), hasher)...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[1]), hasher)...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[2]), hasher)...) return key } func (qs *TripleStore) createValueKeyFor(s string) []byte { - key := make([]byte, 0, 1+qs.hasher.Size()) + hasher := qs.make_hasher() + key := make([]byte, 0, 1+qs.hasher_size) key = append(key, []byte("z")...) - key = append(key, qs.convertStringToByteHash(s)...) + key = append(key, qs.convertStringToByteHash(s, hasher)...) return key } @@ -331,11 +336,11 @@ func (qs *TripleStore) Quad(k graph.Value) quad.Quad { return triple } -func (qs *TripleStore) convertStringToByteHash(s string) []byte { - qs.hasher.Reset() - key := make([]byte, 0, qs.hasher.Size()) - qs.hasher.Write([]byte(s)) - key = qs.hasher.Sum(key) +func (qs *TripleStore) convertStringToByteHash(s string, hasher hash.Hash) []byte { + hasher.Reset() + key := make([]byte, 0, qs.hasher_size) + hasher.Write([]byte(s)) + key = hasher.Sum(key) return key } @@ -441,7 +446,7 @@ func (qs *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph. v := val.(Token) offset := PositionOf(v[0:2], d, qs) if offset != -1 { - return Token(append([]byte("z"), v[offset:offset+qs.hasher.Size()]...)) + return Token(append([]byte("z"), v[offset:offset+qs.hasher_size]...)) } else { return Token(qs.Quad(val).Get(d)) } From 03798bc4fa0d0c7076ba2aa8a91bbdd0797b81d8 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 13 Aug 2014 13:29:08 -0400 Subject: [PATCH 2/3] fix mongo hasher --- graph/mongo/iterator.go | 8 ++++---- graph/mongo/triplestore.go | 42 +++++++++++++++++++++++------------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index 9e37089..0c54910 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -185,13 +185,13 @@ func (it *Iterator) Contains(v graph.Value) bool { case quad.Subject: offset = 0 case quad.Predicate: - offset = (it.qs.hasher.Size() * 2) + offset = (it.qs.hasher_size * 2) case quad.Object: - offset = (it.qs.hasher.Size() * 2) * 2 + offset = (it.qs.hasher_size * 2) * 2 case quad.Label: - offset = (it.qs.hasher.Size() * 2) * 3 + offset = (it.qs.hasher_size * 2) * 3 } - val := v.(string)[offset : it.qs.hasher.Size()*2+offset] + val := v.(string)[offset : it.qs.hasher_size*2+offset] if val == it.hash { it.result = v return graph.ContainsLogOut(it, v, true) diff --git a/graph/mongo/triplestore.go b/graph/mongo/triplestore.go index 364d195..9df314d 100644 --- a/graph/mongo/triplestore.go +++ b/graph/mongo/triplestore.go @@ -39,10 +39,11 @@ var _ graph.BulkLoader = (*TripleStore)(nil) const DefaultDBName = "cayley" type TripleStore struct { - session *mgo.Session - db *mgo.Database - hasher hash.Hash - idCache *IDLru + session *mgo.Session + db *mgo.Database + hasher_size int + make_hasher func() hash.Hash + idCache *IDLru } func createNewMongoGraph(addr string, options graph.Options) error { @@ -86,24 +87,26 @@ func newTripleStore(addr string, options graph.Options) (graph.TripleStore, erro } qs.db = conn.DB(dbName) qs.session = conn - qs.hasher = sha1.New() + qs.hasher_size = sha1.Size + qs.make_hasher = func() hash.Hash { return sha1.New() } qs.idCache = NewIDLru(1 << 16) return &qs, nil } func (qs *TripleStore) getIdForTriple(t quad.Quad) string { - id := qs.ConvertStringToByteHash(t.Subject) - id += qs.ConvertStringToByteHash(t.Predicate) - id += qs.ConvertStringToByteHash(t.Object) - id += qs.ConvertStringToByteHash(t.Label) + hasher := qs.make_hasher() + id := qs.convertStringToByteHash(t.Subject, hasher) + id += qs.convertStringToByteHash(t.Predicate, hasher) + id += qs.convertStringToByteHash(t.Object, hasher) + id += qs.convertStringToByteHash(t.Label, hasher) return id } -func (qs *TripleStore) ConvertStringToByteHash(s string) string { - qs.hasher.Reset() - key := make([]byte, 0, qs.hasher.Size()) - qs.hasher.Write([]byte(s)) - key = qs.hasher.Sum(key) +func (qs *TripleStore) convertStringToByteHash(s string, hasher hash.Hash) string { + hasher.Reset() + key := make([]byte, 0, qs.hasher_size) + hasher.Write([]byte(s)) + key = hasher.Sum(key) return hex.EncodeToString(key) } @@ -243,7 +246,8 @@ func (qs *TripleStore) TriplesAllIterator() graph.Iterator { } func (qs *TripleStore) ValueOf(s string) graph.Value { - return qs.ConvertStringToByteHash(s) + h := qs.make_hasher() + return qs.convertStringToByteHash(s, h) } func (qs *TripleStore) NameOf(v graph.Value) string { @@ -288,13 +292,13 @@ func (qs *TripleStore) TripleDirection(in graph.Value, d quad.Direction) graph.V case quad.Subject: offset = 0 case quad.Predicate: - offset = (qs.hasher.Size() * 2) + offset = (qs.hasher_size * 2) case quad.Object: - offset = (qs.hasher.Size() * 2) * 2 + offset = (qs.hasher_size * 2) * 2 case quad.Label: - offset = (qs.hasher.Size() * 2) * 3 + offset = (qs.hasher_size * 2) * 3 } - val := in.(string)[offset : qs.hasher.Size()*2+offset] + val := in.(string)[offset : qs.hasher_size*2+offset] return val } From cfca7db47c1633a2b0ccb8c065ff88c6366279be Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 13 Aug 2014 14:54:36 -0400 Subject: [PATCH 3/3] Go style My C++ programmer's disease was acting up. --- graph/leveldb/iterator.go | 20 ++++++++++---------- graph/leveldb/triplestore.go | 38 +++++++++++++++++++------------------- graph/mongo/iterator.go | 8 ++++---- graph/mongo/triplestore.go | 28 ++++++++++++++-------------- 4 files changed, 47 insertions(+), 47 deletions(-) diff --git a/graph/leveldb/iterator.go b/graph/leveldb/iterator.go index 035224c..28244cc 100644 --- a/graph/leveldb/iterator.go +++ b/graph/leveldb/iterator.go @@ -43,7 +43,7 @@ type Iterator struct { func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleStore) *Iterator { vb := value.(Token) - p := make([]byte, 0, 2+qs.hasher_size) + p := make([]byte, 0, 2+qs.hasherSize) p = append(p, []byte(prefix)...) p = append(p, []byte(vb[1:])...) @@ -169,9 +169,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { case quad.Subject: return 2 case quad.Predicate: - return qs.hasher_size + 2 + return qs.hasherSize + 2 case quad.Object: - return 2*qs.hasher_size + 2 + return 2*qs.hasherSize + 2 case quad.Label: return -1 } @@ -179,11 +179,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("po")) { switch d { case quad.Subject: - return 2*qs.hasher_size + 2 + return 2*qs.hasherSize + 2 case quad.Predicate: return 2 case quad.Object: - return qs.hasher_size + 2 + return qs.hasherSize + 2 case quad.Label: return -1 } @@ -191,9 +191,9 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("os")) { switch d { case quad.Subject: - return qs.hasher_size + 2 + return qs.hasherSize + 2 case quad.Predicate: - return 2*qs.hasher_size + 2 + return 2*qs.hasherSize + 2 case quad.Object: return 2 case quad.Label: @@ -203,11 +203,11 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { if bytes.Equal(prefix, []byte("cp")) { switch d { case quad.Subject: - return 2*qs.hasher_size + 2 + return 2*qs.hasherSize + 2 case quad.Predicate: - return qs.hasher_size + 2 + return qs.hasherSize + 2 case quad.Object: - return 3*qs.hasher_size + 2 + return 3*qs.hasherSize + 2 case quad.Label: return 2 } diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index 71304df..74626d8 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -49,15 +49,15 @@ func (t Token) Key() interface{} { } type TripleStore struct { - dbOpts *opt.Options - db *leveldb.DB - path string - open bool - size int64 - hasher_size int - make_hasher func() hash.Hash - writeopts *opt.WriteOptions - readopts *opt.ReadOptions + dbOpts *opt.Options + db *leveldb.DB + path string + open bool + size int64 + hasherSize int + makeHasher func() hash.Hash + writeopts *opt.WriteOptions + readopts *opt.ReadOptions } func createNewLevelDB(path string, _ graph.Options) error { @@ -94,8 +94,8 @@ func newTripleStore(path string, options graph.Options) (graph.TripleStore, erro write_buffer_mb = val } qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB - qs.hasher_size = sha1.Size - qs.make_hasher = func() hash.Hash { return sha1.New() } + qs.hasherSize = sha1.Size + qs.makeHasher = sha1.New qs.writeopts = &opt.WriteOptions{ Sync: false, } @@ -125,8 +125,8 @@ func (qs *TripleStore) Size() int64 { } func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { - hasher := qs.make_hasher() - key := make([]byte, 0, 2+(qs.hasher_size*3)) + hasher := qs.makeHasher() + key := make([]byte, 0, 2+(qs.hasherSize*3)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) key = append(key, qs.convertStringToByteHash(triple.Get(d[0]), hasher)...) @@ -136,8 +136,8 @@ func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byt } func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { - hasher := qs.make_hasher() - key := make([]byte, 0, 2+(qs.hasher_size*4)) + hasher := qs.makeHasher() + key := make([]byte, 0, 2+(qs.hasherSize*4)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...) key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label), hasher)...) @@ -148,8 +148,8 @@ func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) [ } func (qs *TripleStore) createValueKeyFor(s string) []byte { - hasher := qs.make_hasher() - key := make([]byte, 0, 1+qs.hasher_size) + hasher := qs.makeHasher() + key := make([]byte, 0, 1+qs.hasherSize) key = append(key, []byte("z")...) key = append(key, qs.convertStringToByteHash(s, hasher)...) return key @@ -338,7 +338,7 @@ func (qs *TripleStore) Quad(k graph.Value) quad.Quad { func (qs *TripleStore) convertStringToByteHash(s string, hasher hash.Hash) []byte { hasher.Reset() - key := make([]byte, 0, qs.hasher_size) + key := make([]byte, 0, qs.hasherSize) hasher.Write([]byte(s)) key = hasher.Sum(key) return key @@ -446,7 +446,7 @@ func (qs *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph. v := val.(Token) offset := PositionOf(v[0:2], d, qs) if offset != -1 { - return Token(append([]byte("z"), v[offset:offset+qs.hasher_size]...)) + return Token(append([]byte("z"), v[offset:offset+qs.hasherSize]...)) } else { return Token(qs.Quad(val).Get(d)) } diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index 0c54910..03a8560 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -185,13 +185,13 @@ func (it *Iterator) Contains(v graph.Value) bool { case quad.Subject: offset = 0 case quad.Predicate: - offset = (it.qs.hasher_size * 2) + offset = (it.qs.hasherSize * 2) case quad.Object: - offset = (it.qs.hasher_size * 2) * 2 + offset = (it.qs.hasherSize * 2) * 2 case quad.Label: - offset = (it.qs.hasher_size * 2) * 3 + offset = (it.qs.hasherSize * 2) * 3 } - val := v.(string)[offset : it.qs.hasher_size*2+offset] + val := v.(string)[offset : it.qs.hasherSize*2+offset] if val == it.hash { it.result = v return graph.ContainsLogOut(it, v, true) diff --git a/graph/mongo/triplestore.go b/graph/mongo/triplestore.go index 9df314d..6c2a334 100644 --- a/graph/mongo/triplestore.go +++ b/graph/mongo/triplestore.go @@ -39,11 +39,11 @@ var _ graph.BulkLoader = (*TripleStore)(nil) const DefaultDBName = "cayley" type TripleStore struct { - session *mgo.Session - db *mgo.Database - hasher_size int - make_hasher func() hash.Hash - idCache *IDLru + session *mgo.Session + db *mgo.Database + hasherSize int + makeHasher func() hash.Hash + idCache *IDLru } func createNewMongoGraph(addr string, options graph.Options) error { @@ -87,14 +87,14 @@ func newTripleStore(addr string, options graph.Options) (graph.TripleStore, erro } qs.db = conn.DB(dbName) qs.session = conn - qs.hasher_size = sha1.Size - qs.make_hasher = func() hash.Hash { return sha1.New() } + qs.hasherSize = sha1.Size + qs.makeHasher = sha1.New qs.idCache = NewIDLru(1 << 16) return &qs, nil } func (qs *TripleStore) getIdForTriple(t quad.Quad) string { - hasher := qs.make_hasher() + hasher := qs.makeHasher() id := qs.convertStringToByteHash(t.Subject, hasher) id += qs.convertStringToByteHash(t.Predicate, hasher) id += qs.convertStringToByteHash(t.Object, hasher) @@ -104,7 +104,7 @@ func (qs *TripleStore) getIdForTriple(t quad.Quad) string { func (qs *TripleStore) convertStringToByteHash(s string, hasher hash.Hash) string { hasher.Reset() - key := make([]byte, 0, qs.hasher_size) + key := make([]byte, 0, qs.hasherSize) hasher.Write([]byte(s)) key = hasher.Sum(key) return hex.EncodeToString(key) @@ -246,7 +246,7 @@ func (qs *TripleStore) TriplesAllIterator() graph.Iterator { } func (qs *TripleStore) ValueOf(s string) graph.Value { - h := qs.make_hasher() + h := qs.makeHasher() return qs.convertStringToByteHash(s, h) } @@ -292,13 +292,13 @@ func (qs *TripleStore) TripleDirection(in graph.Value, d quad.Direction) graph.V case quad.Subject: offset = 0 case quad.Predicate: - offset = (qs.hasher_size * 2) + offset = (qs.hasherSize * 2) case quad.Object: - offset = (qs.hasher_size * 2) * 2 + offset = (qs.hasherSize * 2) * 2 case quad.Label: - offset = (qs.hasher_size * 2) * 3 + offset = (qs.hasherSize * 2) * 3 } - val := in.(string)[offset : qs.hasher_size*2+offset] + val := in.(string)[offset : qs.hasherSize*2+offset] return val }