cayley/graph/leveldb/triplestore.go
kortschak 6acfdcc5d6 Use concrete value for quad.Quad
Comparison of -short benchmarks in cayley.

$ benchcmp pointer.bench concrete.bench
benchmark                                   old ns/op     new ns/op	delta
BenchmarkNamePredicate                      1673276       1655093	-1.09%
BenchmarkLargeSetsNoIntersection            318985907     261499984	-18.02%
BenchmarkNetAndSpeed                        104403743     41516981	-60.23%
BenchmarkKeanuAndNet                        17309258      16857513	-2.61%
BenchmarkKeanuAndSpeed                      20159161      19282833	-4.35%

Comparison of pathological cases are not so happy.

benchmark                                   old ns/op       new ns/op		delta
BenchmarkVeryLargeSetsSmallIntersection     55269775527     246084606672	+345.24%
BenchmarkHelplessContainsChecker            23436501319     24308906949		+3.72%

Profiling the worst case:

Pointer:
Total: 6121 samples
    1973  32.2%  32.2%     1973  32.2% runtime.findfunc
     773  12.6%  44.9%      773  12.6% readvarint
     510   8.3%  53.2%      511   8.3% step
     409   6.7%  59.9%      410   6.7% runtime.gentraceback
     390   6.4%  66.2%      391   6.4% pcvalue
     215   3.5%  69.8%      215   3.5% runtime.funcdata
     181   3.0%  72.7%      181   3.0% checkframecopy
     118   1.9%  74.6%      119   1.9% runtime.funcspdelta
      96   1.6%  76.2%       96   1.6% runtime.topofstack
      76   1.2%  77.5%       76   1.2% scanblock

Concrete:
Total: 25027 samples
    9437  37.7%  37.7%     9437  37.7% runtime.findfunc
    3853  15.4%  53.1%     3853  15.4% readvarint
    2366   9.5%  62.6%     2366   9.5% step
    2186   8.7%  71.3%     2186   8.7% runtime.gentraceback
    1816   7.3%  78.5%     1816   7.3% pcvalue
    1016   4.1%  82.6%     1016   4.1% runtime.funcdata
     859   3.4%  86.0%      859   3.4% checkframecopy
     506   2.0%  88.1%      506   2.0% runtime.funcspdelta
     410   1.6%  89.7%      410   1.6% runtime.topofstack
     303   1.2%  90.9%      303   1.2% runtime.newstack
2014-08-05 23:25:02 +09:30

450 lines
12 KiB
Go

// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package leveldb
import (
"bytes"
"crypto/sha1"
"encoding/binary"
"encoding/json"
"fmt"
"hash"
"github.com/barakmich/glog"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad"
)
func init() {
graph.RegisterTripleStore("leveldb", newTripleStore, createNewLevelDB)
}
const (
DefaultCacheSize = 2
DefaultWriteBufferSize = 20
)
type TripleStore struct {
dbOpts *opt.Options
db *leveldb.DB
path string
open bool
size int64
hasher hash.Hash
writeopts *opt.WriteOptions
readopts *opt.ReadOptions
}
func createNewLevelDB(path string, _ graph.Options) error {
opts := &opt.Options{}
db, err := leveldb.OpenFile(path, opts)
if err != nil {
glog.Errorf("Error: couldn't create database: %v", err)
return err
}
defer db.Close()
qs := &TripleStore{}
qs.db = db
qs.writeopts = &opt.WriteOptions{
Sync: true,
}
qs.Close()
return nil
}
func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) {
var qs TripleStore
qs.path = path
cache_size := DefaultCacheSize
if val, ok := options.IntKey("cache_size_mb"); ok {
cache_size = val
}
qs.dbOpts = &opt.Options{
BlockCache: cache.NewLRUCache(cache_size * opt.MiB),
}
qs.dbOpts.ErrorIfMissing = true
write_buffer_mb := DefaultWriteBufferSize
if val, ok := options.IntKey("write_buffer_mb"); ok {
write_buffer_mb = val
}
qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB
qs.hasher = sha1.New()
qs.writeopts = &opt.WriteOptions{
Sync: false,
}
qs.readopts = &opt.ReadOptions{}
db, err := leveldb.OpenFile(qs.path, qs.dbOpts)
if err != nil {
panic("Error, couldn't open! " + err.Error())
}
qs.db = db
glog.Infoln(qs.GetStats())
qs.getSize()
return &qs, nil
}
func (qs *TripleStore) GetStats() string {
out := ""
stats, err := qs.db.GetProperty("leveldb.stats")
if err == nil {
out += fmt.Sprintln("Stats: ", stats)
}
out += fmt.Sprintln("Size: ", qs.size)
return out
}
func (qs *TripleStore) Size() int64 {
return qs.size
}
func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
key := make([]byte, 0, 2+(qs.hasher.Size()*3))
// TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
return key
}
func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
key := make([]byte, 0, 2+(qs.hasher.Size()*4))
// TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
return key
}
func (qs *TripleStore) createValueKeyFor(s string) []byte {
key := make([]byte, 0, 1+qs.hasher.Size())
key = append(key, []byte("z")...)
key = append(key, qs.convertStringToByteHash(s)...)
return key
}
func (qs *TripleStore) AddTriple(t quad.Quad) {
batch := &leveldb.Batch{}
qs.buildWrite(batch, t)
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Errorf("Couldn't write to DB for triple %s.", t)
return
}
qs.size++
}
// Short hand for direction permutations.
var (
spo = [3]quad.Direction{quad.Subject, quad.Predicate, quad.Object}
osp = [3]quad.Direction{quad.Object, quad.Subject, quad.Predicate}
pos = [3]quad.Direction{quad.Predicate, quad.Object, quad.Subject}
pso = [3]quad.Direction{quad.Predicate, quad.Subject, quad.Object}
)
func (qs *TripleStore) RemoveTriple(t quad.Quad) {
_, err := qs.db.Get(qs.createKeyFor(spo, t), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
glog.Error("Couldn't access DB to confirm deletion")
return
}
if err == leveldb.ErrNotFound {
// No such triple in the database, forget about it.
return
}
batch := &leveldb.Batch{}
batch.Delete(qs.createKeyFor(spo, t))
batch.Delete(qs.createKeyFor(osp, t))
batch.Delete(qs.createKeyFor(pos, t))
qs.UpdateValueKeyBy(t.Get(quad.Subject), -1, batch)
qs.UpdateValueKeyBy(t.Get(quad.Predicate), -1, batch)
qs.UpdateValueKeyBy(t.Get(quad.Object), -1, batch)
if t.Get(quad.Label) != "" {
batch.Delete(qs.createProvKeyFor(pso, t))
qs.UpdateValueKeyBy(t.Get(quad.Label), -1, batch)
}
err = qs.db.Write(batch, nil)
if err != nil {
glog.Errorf("Couldn't delete triple %s.", t)
return
}
qs.size--
}
func (qs *TripleStore) buildTripleWrite(batch *leveldb.Batch, t quad.Quad) {
bytes, err := json.Marshal(t)
if err != nil {
glog.Errorf("Couldn't write to buffer for triple %s: %s", t, err)
return
}
batch.Put(qs.createKeyFor(spo, t), bytes)
batch.Put(qs.createKeyFor(osp, t), bytes)
batch.Put(qs.createKeyFor(pos, t), bytes)
if t.Get(quad.Label) != "" {
batch.Put(qs.createProvKeyFor(pso, t), bytes)
}
}
func (qs *TripleStore) buildWrite(batch *leveldb.Batch, t quad.Quad) {
qs.buildTripleWrite(batch, t)
qs.UpdateValueKeyBy(t.Get(quad.Subject), 1, nil)
qs.UpdateValueKeyBy(t.Get(quad.Predicate), 1, nil)
qs.UpdateValueKeyBy(t.Get(quad.Object), 1, nil)
if t.Get(quad.Label) != "" {
qs.UpdateValueKeyBy(t.Get(quad.Label), 1, nil)
}
}
type ValueData struct {
Name string
Size int64
}
func (qs *TripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb.Batch) {
value := &ValueData{name, int64(amount)}
key := qs.createValueKeyFor(name)
b, err := qs.db.Get(key, qs.readopts)
// Error getting the node from the database.
if err != nil && err != leveldb.ErrNotFound {
glog.Errorf("Error reading Value %s from the DB.", name)
return
}
// Node exists in the database -- unmarshal and update.
if b != nil && err != leveldb.ErrNotFound {
err = json.Unmarshal(b, value)
if err != nil {
glog.Errorf("Error: couldn't reconstruct value: %v", err)
return
}
value.Size += int64(amount)
}
// Are we deleting something?
if amount < 0 {
if value.Size <= 0 {
if batch == nil {
qs.db.Delete(key, qs.writeopts)
} else {
batch.Delete(key)
}
return
}
}
// Repackage and rewrite.
bytes, err := json.Marshal(&value)
if err != nil {
glog.Errorf("Couldn't write to buffer for value %s: %s", name, err)
return
}
if batch == nil {
qs.db.Put(key, bytes, qs.writeopts)
} else {
batch.Put(key, bytes)
}
}
func (qs *TripleStore) AddTripleSet(t_s []quad.Quad) {
batch := &leveldb.Batch{}
newTs := len(t_s)
resizeMap := make(map[string]int)
for _, t := range t_s {
qs.buildTripleWrite(batch, t)
resizeMap[t.Subject]++
resizeMap[t.Predicate]++
resizeMap[t.Object]++
if t.Label != "" {
resizeMap[t.Label]++
}
}
for k, v := range resizeMap {
qs.UpdateValueKeyBy(k, v, batch)
}
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Error("Couldn't write to DB for tripleset.")
return
}
qs.size += int64(newTs)
}
func (qs *TripleStore) Close() {
buf := new(bytes.Buffer)
err := binary.Write(buf, binary.LittleEndian, qs.size)
if err == nil {
werr := qs.db.Put([]byte("__size"), buf.Bytes(), qs.writeopts)
if werr != nil {
glog.Error("Couldn't write size before closing!")
}
} else {
glog.Errorf("Couldn't convert size before closing!")
}
qs.db.Close()
qs.open = false
}
func (qs *TripleStore) Quad(k graph.Value) quad.Quad {
var triple quad.Quad
b, err := qs.db.Get(k.([]byte), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
glog.Error("Error: couldn't get triple from DB.")
return quad.Quad{}
}
if err == leveldb.ErrNotFound {
// No harm, no foul.
return quad.Quad{}
}
err = json.Unmarshal(b, &triple)
if err != nil {
glog.Error("Error: couldn't reconstruct triple.")
return quad.Quad{}
}
return triple
}
func (qs *TripleStore) convertStringToByteHash(s string) []byte {
qs.hasher.Reset()
key := make([]byte, 0, qs.hasher.Size())
qs.hasher.Write([]byte(s))
key = qs.hasher.Sum(key)
return key
}
func (qs *TripleStore) ValueOf(s string) graph.Value {
return qs.createValueKeyFor(s)
}
func (qs *TripleStore) valueData(value_key []byte) ValueData {
var out ValueData
if glog.V(3) {
glog.V(3).Infof("%s %v", string(value_key[0]), value_key)
}
b, err := qs.db.Get(value_key, qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
glog.Errorln("Error: couldn't get value from DB")
return out
}
if b != nil && err != leveldb.ErrNotFound {
err = json.Unmarshal(b, &out)
if err != nil {
glog.Errorln("Error: couldn't reconstruct value")
return ValueData{}
}
}
return out
}
func (qs *TripleStore) NameOf(k graph.Value) string {
if k == nil {
glog.V(2).Info("k was nil")
return ""
}
return qs.valueData(k.([]byte)).Name
}
func (qs *TripleStore) SizeOf(k graph.Value) int64 {
if k == nil {
return 0
}
return int64(qs.valueData(k.([]byte)).Size)
}
func (qs *TripleStore) getSize() {
var size int64
b, err := qs.db.Get([]byte("__size"), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
panic("Couldn't read size " + err.Error())
}
if err == leveldb.ErrNotFound {
// Must be a new database. Cool
qs.size = 0
return
}
buf := bytes.NewBuffer(b)
err = binary.Read(buf, binary.LittleEndian, &size)
if err != nil {
glog.Errorln("Error: couldn't parse size")
}
qs.size = size
}
func (qs *TripleStore) SizeOfPrefix(pre []byte) (int64, error) {
limit := make([]byte, len(pre))
copy(limit, pre)
end := len(limit) - 1
limit[end]++
ranges := make([]util.Range, 1)
ranges[0].Start = pre
ranges[0].Limit = limit
sizes, err := qs.db.SizeOf(ranges)
if err == nil {
return (int64(sizes[0]) >> 6) + 1, nil
}
return 0, nil
}
func (qs *TripleStore) TripleIterator(d quad.Direction, val graph.Value) graph.Iterator {
var prefix string
switch d {
case quad.Subject:
prefix = "sp"
case quad.Predicate:
prefix = "po"
case quad.Object:
prefix = "os"
case quad.Label:
prefix = "cp"
default:
panic("unreachable " + d.String())
}
return NewIterator(prefix, d, val, qs)
}
func (qs *TripleStore) NodesAllIterator() graph.Iterator {
return NewAllIterator("z", quad.Any, qs)
}
func (qs *TripleStore) TriplesAllIterator() graph.Iterator {
return NewAllIterator("po", quad.Predicate, qs)
}
func (qs *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph.Value {
v := val.([]uint8)
offset := PositionOf(v[0:2], d, qs)
if offset != -1 {
return append([]byte("z"), v[offset:offset+qs.hasher.Size()]...)
} else {
return qs.Quad(val).Get(d)
}
}
func compareBytes(a, b graph.Value) bool {
return bytes.Equal(a.([]uint8), b.([]uint8))
}
func (qs *TripleStore) FixedIterator() graph.FixedIterator {
return iterator.NewFixedIteratorWithCompare(compareBytes)
}