Use cznic/b for index store

$ benchcmp gollrb.bench b-gen.bench
benchmark                                   old ns/op       new ns/op	delta
BenchmarkNamePredicate                      1731218         1693373		-2.19%
BenchmarkLargeSetsNoIntersection            81290360        70205277	-13.64%
BenchmarkVeryLargeSetsSmallIntersection     768135620       442906243	-42.34%
BenchmarkHelplessContainsChecker            39477086024     35260603748	-10.68%
BenchmarkNetAndSpeed                        22510637        21587975	-4.10%
BenchmarkKeanuAndNet                        18018886        17795328	-1.24%
BenchmarkKeanuAndSpeed                      20336586        20560228	+1.10%
BenchmarkKeanuOther                         85495040        80718152	-5.59%
BenchmarkKeanuBullockOther                  95457792        83868434	-12.14%

Code gen from $GOPATH/src/github.com/cznic/b:

  make generic \
| sed -e 's/KEY/int64/g' -e 's/VALUE/struct{}/g' \
> $GOPATH/src/github.com/google/cayley/graph/memstore/b/keys.go

key_test.go manually edited.
This commit is contained in:
kortschak 2014-08-10 12:22:07 +09:30
parent aad21b0585
commit 2540ea8f87
4 changed files with 1492 additions and 114 deletions

972
graph/memstore/b/keys.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,396 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package b
import (
"math"
"runtime/debug"
"testing"
"github.com/cznic/mathutil"
)
func rng() *mathutil.FC32 {
x, err := mathutil.NewFC32(math.MinInt32/4, math.MaxInt32/4, false)
if err != nil {
panic(err)
}
return x
}
func cmp(a, b int64) int {
return int(a - b)
}
func BenchmarkSetSeq1e3(b *testing.B) {
benchmarkSetSeq(b, 1e3)
}
func BenchmarkSetSeq1e4(b *testing.B) {
benchmarkSetSeq(b, 1e4)
}
func BenchmarkSetSeq1e5(b *testing.B) {
benchmarkSetSeq(b, 1e5)
}
func BenchmarkSetSeq1e6(b *testing.B) {
benchmarkSetSeq(b, 1e6)
}
func benchmarkSetSeq(b *testing.B, n int) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
r.Set(j, struct{}{})
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkGetSeq1e3(b *testing.B) {
benchmarkGetSeq(b, 1e3)
}
func BenchmarkGetSeq1e4(b *testing.B) {
benchmarkGetSeq(b, 1e4)
}
func BenchmarkGetSeq1e5(b *testing.B) {
benchmarkGetSeq(b, 1e5)
}
func BenchmarkGetSeq1e6(b *testing.B) {
benchmarkGetSeq(b, 1e6)
}
func benchmarkGetSeq(b *testing.B, n int) {
r := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
r.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for j := int64(0); j < int64(n); j++ {
r.Get(j)
}
}
b.StopTimer()
r.Close()
}
func BenchmarkSetRnd1e3(b *testing.B) {
benchmarkSetRnd(b, 1e3)
}
func BenchmarkSetRnd1e4(b *testing.B) {
benchmarkSetRnd(b, 1e4)
}
func BenchmarkSetRnd1e5(b *testing.B) {
benchmarkSetRnd(b, 1e5)
}
func BenchmarkSetRnd1e6(b *testing.B) {
benchmarkSetRnd(b, 1e6)
}
func benchmarkSetRnd(b *testing.B, n int) {
rng := rng()
a := make([]int, n)
for i := range a {
a[i] = rng.Next()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
debug.FreeOSMemory()
b.StartTimer()
for _, v := range a {
r.Set(int64(v), struct{}{})
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkGetRnd1e3(b *testing.B) {
benchmarkGetRnd(b, 1e3)
}
func BenchmarkGetRnd1e4(b *testing.B) {
benchmarkGetRnd(b, 1e4)
}
func BenchmarkGetRnd1e5(b *testing.B) {
benchmarkGetRnd(b, 1e5)
}
func BenchmarkGetRnd1e6(b *testing.B) {
benchmarkGetRnd(b, 1e6)
}
func benchmarkGetRnd(b *testing.B, n int) {
r := TreeNew(cmp)
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range a {
r.Get(v)
}
}
b.StopTimer()
r.Close()
}
func BenchmarkDelSeq1e3(b *testing.B) {
benchmarkDelSeq(b, 1e3)
}
func BenchmarkDelSeq1e4(b *testing.B) {
benchmarkDelSeq(b, 1e4)
}
func BenchmarkDelSeq1e5(b *testing.B) {
benchmarkDelSeq(b, 1e5)
}
func BenchmarkDelSeq1e6(b *testing.B) {
benchmarkDelSeq(b, 1e6)
}
func benchmarkDelSeq(b *testing.B, n int) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
for j := int64(0); j < int64(n); j++ {
r.Set(j, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
r.Delete(j)
}
}
b.StopTimer()
}
func BenchmarkDelRnd1e3(b *testing.B) {
benchmarkDelRnd(b, 1e3)
}
func BenchmarkDelRnd1e4(b *testing.B) {
benchmarkDelRnd(b, 1e4)
}
func BenchmarkDelRnd1e5(b *testing.B) {
benchmarkDelRnd(b, 1e5)
}
func BenchmarkDelRnd1e6(b *testing.B) {
benchmarkDelRnd(b, 1e6)
}
func benchmarkDelRnd(b *testing.B, n int) {
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for _, v := range a {
r.Delete(v)
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkSeekSeq1e3(b *testing.B) {
benchmarkSeekSeq(b, 1e3)
}
func BenchmarkSeekSeq1e4(b *testing.B) {
benchmarkSeekSeq(b, 1e4)
}
func BenchmarkSeekSeq1e5(b *testing.B) {
benchmarkSeekSeq(b, 1e5)
}
func BenchmarkSeekSeq1e6(b *testing.B) {
benchmarkSeekSeq(b, 1e6)
}
func benchmarkSeekSeq(b *testing.B, n int) {
for i := 0; i < b.N; i++ {
b.StopTimer()
t := TreeNew(cmp)
for j := int64(0); j < int64(n); j++ {
t.Set(j, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
e, _ := t.Seek(j)
e.Close()
}
b.StopTimer()
t.Close()
}
b.StopTimer()
}
func BenchmarkSeekRnd1e3(b *testing.B) {
benchmarkSeekRnd(b, 1e3)
}
func BenchmarkSeekRnd1e4(b *testing.B) {
benchmarkSeekRnd(b, 1e4)
}
func BenchmarkSeekRnd1e5(b *testing.B) {
benchmarkSeekRnd(b, 1e5)
}
func BenchmarkSeekRnd1e6(b *testing.B) {
benchmarkSeekRnd(b, 1e6)
}
func benchmarkSeekRnd(b *testing.B, n int) {
r := TreeNew(cmp)
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range a {
e, _ := r.Seek(v)
e.Close()
}
}
b.StopTimer()
r.Close()
}
func BenchmarkNext1e3(b *testing.B) {
benchmarkNext(b, 1e3)
}
func BenchmarkNext1e4(b *testing.B) {
benchmarkNext(b, 1e4)
}
func BenchmarkNext1e5(b *testing.B) {
benchmarkNext(b, 1e5)
}
func BenchmarkNext1e6(b *testing.B) {
benchmarkNext(b, 1e6)
}
func benchmarkNext(b *testing.B, n int) {
t := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
t.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
en, err := t.SeekFirst()
if err != nil {
b.Fatal(err)
}
m := 0
for {
if _, _, err = en.Next(); err != nil {
break
}
m++
}
if m != n {
b.Fatal(m)
}
}
b.StopTimer()
t.Close()
}
func BenchmarkPrev1e3(b *testing.B) {
benchmarkPrev(b, 1e3)
}
func BenchmarkPrev1e4(b *testing.B) {
benchmarkPrev(b, 1e4)
}
func BenchmarkPrev1e5(b *testing.B) {
benchmarkPrev(b, 1e5)
}
func BenchmarkPrev1e6(b *testing.B) {
benchmarkPrev(b, 1e6)
}
func benchmarkPrev(b *testing.B, n int) {
t := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
t.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
en, err := t.SeekLast()
if err != nil {
b.Fatal(err)
}
m := 0
for {
if _, _, err = en.Prev(); err != nil {
break
}
m++
}
if m != n {
b.Fatal(m)
}
}
}

View file

@ -19,46 +19,33 @@ import (
"math" "math"
"strings" "strings"
"github.com/petar/GoLLRB/llrb"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator" "github.com/google/cayley/graph/iterator"
"github.com/google/cayley/graph/memstore/b"
) )
type Iterator struct { type Iterator struct {
uid uint64 uid uint64
tags graph.Tagger tags graph.Tagger
tree *llrb.LLRB tree *b.Tree
iter *b.Enumerator
data string data string
isRunning bool
iterLast Int64
result graph.Value result graph.Value
} }
type Int64 int64 func cmp(a, b int64) int {
return int(a - b)
func (i Int64) Less(than llrb.Item) bool {
return i < than.(Int64)
} }
func IterateOne(tree *llrb.LLRB, last Int64) Int64 { func NewIterator(tree *b.Tree, data string) *Iterator {
var next Int64 iter, err := tree.SeekFirst()
tree.AscendGreaterOrEqual(last, func(i llrb.Item) bool { if err != nil {
if i.(Int64) == last { iter = nil
return true
} else {
next = i.(Int64)
return false
} }
})
return next
}
func NewLlrbIterator(tree *llrb.LLRB, data string) *Iterator {
return &Iterator{ return &Iterator{
uid: iterator.NextUID(), uid: iterator.NextUID(),
tree: tree, tree: tree,
iterLast: Int64(-1), iter: iter,
data: data, data: data,
} }
} }
@ -68,7 +55,11 @@ func (it *Iterator) UID() uint64 {
} }
func (it *Iterator) Reset() { func (it *Iterator) Reset() {
it.iterLast = Int64(-1) var err error
it.iter, err = it.tree.SeekFirst()
if err != nil {
it.iter = nil
}
} }
func (it *Iterator) Tagger() *graph.Tagger { func (it *Iterator) Tagger() *graph.Tagger {
@ -86,20 +77,51 @@ func (it *Iterator) TagResults(dst map[string]graph.Value) {
} }
func (it *Iterator) Clone() graph.Iterator { func (it *Iterator) Clone() graph.Iterator {
m := NewLlrbIterator(it.tree, it.data) var iter *b.Enumerator
if it.result != nil {
var ok bool
iter, ok = it.tree.Seek(it.result.(int64))
if !ok {
panic("value unexpectedly missing")
}
} else {
var err error
iter, err = it.tree.SeekFirst()
if err != nil {
iter = nil
}
}
m := &Iterator{
uid: iterator.NextUID(),
tree: it.tree,
iter: iter,
data: it.data,
}
m.tags.CopyFrom(it) m.tags.CopyFrom(it)
return m return m
} }
func (it *Iterator) Close() {} func (it *Iterator) Close() {
if it.iter != nil {
it.iter.Close()
it.iter = nil
}
}
func (it *Iterator) Next() bool { func (it *Iterator) Next() bool {
graph.NextLogIn(it) graph.NextLogIn(it)
if it.tree.Max() == nil || it.result == int64(it.tree.Max().(Int64)) {
if it.iter == nil {
return graph.NextLogOut(it, nil, false) return graph.NextLogOut(it, nil, false)
} }
it.iterLast = IterateOne(it.tree, it.iterLast) result, _, err := it.iter.Next()
it.result = int64(it.iterLast) if err != nil {
return graph.NextLogOut(it, nil, false)
}
it.result = result
return graph.NextLogOut(it, it.result, true) return graph.NextLogOut(it, it.result, true)
} }
@ -126,7 +148,7 @@ func (it *Iterator) Size() (int64, bool) {
func (it *Iterator) Contains(v graph.Value) bool { func (it *Iterator) Contains(v graph.Value) bool {
graph.ContainsLogIn(it, v) graph.ContainsLogIn(it, v)
if it.tree.Has(Int64(v.(int64))) { if _, ok := it.tree.Get(v.(int64)); ok {
it.result = v it.result = v
return graph.ContainsLogOut(it, v, true) return graph.ContainsLogOut(it, v, true)
} }
@ -141,7 +163,7 @@ func (it *Iterator) DebugString(indent int) string {
var memType graph.Type var memType graph.Type
func init() { func init() {
memType = graph.RegisterIterator("llrb") memType = graph.RegisterIterator("b+tree")
} }
func Type() graph.Type { return memType } func Type() graph.Type { return memType }

View file

@ -18,11 +18,11 @@ import (
"fmt" "fmt"
"github.com/barakmich/glog" "github.com/barakmich/glog"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator" "github.com/google/cayley/graph/iterator"
"github.com/google/cayley/graph/memstore/b"
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
"github.com/petar/GoLLRB/llrb"
) )
func init() { func init() {
@ -32,47 +32,36 @@ func init() {
} }
type TripleDirectionIndex struct { type TripleDirectionIndex struct {
subject map[int64]*llrb.LLRB index [4]map[int64]*b.Tree
predicate map[int64]*llrb.LLRB
object map[int64]*llrb.LLRB
label map[int64]*llrb.LLRB
} }
func NewTripleDirectionIndex() *TripleDirectionIndex { func NewTripleDirectionIndex() TripleDirectionIndex {
var tdi TripleDirectionIndex return TripleDirectionIndex{[...]map[int64]*b.Tree{
tdi.subject = make(map[int64]*llrb.LLRB) quad.Subject - 1: make(map[int64]*b.Tree),
tdi.predicate = make(map[int64]*llrb.LLRB) quad.Predicate - 1: make(map[int64]*b.Tree),
tdi.object = make(map[int64]*llrb.LLRB) quad.Object - 1: make(map[int64]*b.Tree),
tdi.label = make(map[int64]*llrb.LLRB) quad.Label - 1: make(map[int64]*b.Tree),
return &tdi }}
} }
func (tdi *TripleDirectionIndex) GetForDir(d quad.Direction) map[int64]*llrb.LLRB { func (tdi TripleDirectionIndex) Tree(d quad.Direction, id int64) *b.Tree {
switch d { if d < quad.Subject || d > quad.Label {
case quad.Subject:
return tdi.subject
case quad.Object:
return tdi.object
case quad.Predicate:
return tdi.predicate
case quad.Label:
return tdi.label
}
panic("illegal direction") panic("illegal direction")
}
func (tdi *TripleDirectionIndex) GetOrCreate(d quad.Direction, id int64) *llrb.LLRB {
directionIndex := tdi.GetForDir(d)
if _, ok := directionIndex[id]; !ok {
directionIndex[id] = llrb.New()
} }
return directionIndex[id] tree, ok := tdi.index[d-1][id]
if !ok {
tree = b.TreeNew(cmp)
tdi.index[d-1][id] = tree
}
return tree
} }
func (tdi *TripleDirectionIndex) Get(d quad.Direction, id int64) (*llrb.LLRB, bool) { func (tdi TripleDirectionIndex) Get(d quad.Direction, id int64) (*b.Tree, bool) {
directionIndex := tdi.GetForDir(d) if d < quad.Subject || d > quad.Label {
tree, exists := directionIndex[id] panic("illegal direction")
return tree, exists }
tree, ok := tdi.index[d-1][id]
return tree, ok
} }
type TripleStore struct { type TripleStore struct {
@ -83,22 +72,22 @@ type TripleStore struct {
triples []quad.Quad triples []quad.Quad
size int64 size int64
index TripleDirectionIndex index TripleDirectionIndex
// vip_index map[string]map[int64]map[string]map[int64]*llrb.Tree // vip_index map[string]map[int64]map[string]map[int64]*b.Tree
} }
func newTripleStore() *TripleStore { func newTripleStore() *TripleStore {
var ts TripleStore return &TripleStore{
ts.idMap = make(map[string]int64) idMap: make(map[string]int64),
ts.revIdMap = make(map[int64]string) revIdMap: make(map[int64]string),
ts.triples = make([]quad.Quad, 1, 200)
// Sentinel null triple so triple indices start at 1 // Sentinel null triple so triple indices start at 1
ts.triples[0] = quad.Quad{} triples: make([]quad.Quad, 1, 200),
ts.size = 1
ts.index = *NewTripleDirectionIndex() size: 1,
ts.idCounter = 1 index: NewTripleDirectionIndex(),
ts.tripleIdCounter = 1 idCounter: 1,
return &ts tripleIdCounter: 1,
}
} }
func (ts *TripleStore) AddTripleSet(triples []quad.Quad) { func (ts *TripleStore) AddTripleSet(triples []quad.Quad) {
@ -107,47 +96,47 @@ func (ts *TripleStore) AddTripleSet(triples []quad.Quad) {
} }
} }
func (ts *TripleStore) tripleExists(t quad.Quad) (bool, int64) { const maxInt = int(^uint(0) >> 1)
smallest := -1
var smallest_tree *llrb.LLRB func (ts *TripleStore) indexOf(t quad.Quad) (int64, bool) {
min := maxInt
var tree *b.Tree
for d := quad.Subject; d <= quad.Label; d++ { for d := quad.Subject; d <= quad.Label; d++ {
sid := t.Get(d) sid := t.Get(d)
if d == quad.Label && sid == "" { if d == quad.Label && sid == "" {
continue continue
} }
id, ok := ts.idMap[sid] id, ok := ts.idMap[sid]
// If we've never heard about a node, it most not exist // If we've never heard about a node, it must not exist
if !ok { if !ok {
return false, 0 return 0, false
} }
index, exists := ts.index.Get(d, id) index, ok := ts.index.Get(d, id)
if !exists { if !ok {
// If it's never been indexed in this direction, it can't exist. // If it's never been indexed in this direction, it can't exist.
return false, 0 return 0, false
} }
if smallest == -1 || index.Len() < smallest { if l := index.Len(); l < min {
smallest = index.Len() min, tree = l, index
smallest_tree = index
} }
} }
it := NewLlrbIterator(smallest_tree, "") it := NewIterator(tree, "")
for it.Next() { for it.Next() {
val := it.Result() val := it.Result()
if t == ts.triples[val.(int64)] { if t == ts.triples[val.(int64)] {
return true, val.(int64) return val.(int64), true
} }
} }
return false, 0 return 0, false
} }
func (ts *TripleStore) AddTriple(t quad.Quad) { func (ts *TripleStore) AddTriple(t quad.Quad) {
if exists, _ := ts.tripleExists(t); exists { if _, exists := ts.indexOf(t); exists {
return return
} }
var tripleID int64
ts.triples = append(ts.triples, t) ts.triples = append(ts.triples, t)
tripleID = ts.tripleIdCounter tid := ts.tripleIdCounter
ts.size++ ts.size++
ts.tripleIdCounter++ ts.tripleIdCounter++
@ -168,22 +157,20 @@ func (ts *TripleStore) AddTriple(t quad.Quad) {
continue continue
} }
id := ts.idMap[t.Get(d)] id := ts.idMap[t.Get(d)]
tree := ts.index.GetOrCreate(d, id) tree := ts.index.Tree(d, id)
tree.ReplaceOrInsert(Int64(tripleID)) tree.Set(tid, struct{}{})
} }
// TODO(barakmich): Add VIP indexing // TODO(barakmich): Add VIP indexing
} }
func (ts *TripleStore) RemoveTriple(t quad.Quad) { func (ts *TripleStore) RemoveTriple(t quad.Quad) {
var tripleID int64 tid, ok := ts.indexOf(t)
var exists bool if !ok {
tripleID = 0
if exists, tripleID = ts.tripleExists(t); !exists {
return return
} }
ts.triples[tripleID] = quad.Quad{} ts.triples[tid] = quad.Quad{}
ts.size-- ts.size--
for d := quad.Subject; d <= quad.Label; d++ { for d := quad.Subject; d <= quad.Label; d++ {
@ -191,8 +178,8 @@ func (ts *TripleStore) RemoveTriple(t quad.Quad) {
continue continue
} }
id := ts.idMap[t.Get(d)] id := ts.idMap[t.Get(d)]
tree := ts.index.GetOrCreate(d, id) tree := ts.index.Tree(d, id)
tree.Delete(Int64(tripleID)) tree.Delete(tid)
} }
for d := quad.Subject; d <= quad.Label; d++ { for d := quad.Subject; d <= quad.Label; d++ {
@ -208,7 +195,7 @@ func (ts *TripleStore) RemoveTriple(t quad.Quad) {
if d == quad.Label && t.Get(d) == "" { if d == quad.Label && t.Get(d) == "" {
continue continue
} }
nodeTree := ts.index.GetOrCreate(d, id) nodeTree := ts.index.Tree(d, id)
if nodeTree.Len() != 0 { if nodeTree.Len() != 0 {
stillExists = true stillExists = true
break break
@ -229,7 +216,7 @@ func (ts *TripleStore) TripleIterator(d quad.Direction, value graph.Value) graph
index, ok := ts.index.Get(d, value.(int64)) index, ok := ts.index.Get(d, value.(int64))
data := fmt.Sprintf("dir:%s val:%d", d, value.(int64)) data := fmt.Sprintf("dir:%s val:%d", d, value.(int64))
if ok { if ok {
return NewLlrbIterator(index, data) return NewIterator(index, data)
} }
return &iterator.Null{} return &iterator.Null{}
} }
@ -271,4 +258,5 @@ func (ts *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph.
func (ts *TripleStore) NodesAllIterator() graph.Iterator { func (ts *TripleStore) NodesAllIterator() graph.Iterator {
return NewMemstoreAllIterator(ts) return NewMemstoreAllIterator(ts)
} }
func (ts *TripleStore) Close() {} func (ts *TripleStore) Close() {}