Merge pull request #113 from barakmich/log_database

Convert Cayley indexing to an append-only log
This commit is contained in:
Barak Michener 2014-08-16 03:26:49 -04:00
commit e1e95b9686
27 changed files with 2326 additions and 635 deletions

View file

@ -8,6 +8,7 @@ go:
install: install:
- go get github.com/badgerodon/peg - go get github.com/badgerodon/peg
- go get github.com/barakmich/glog - go get github.com/barakmich/glog
- go get github.com/cznic/mathutil
- go get github.com/julienschmidt/httprouter - go get github.com/julienschmidt/httprouter
- go get github.com/petar/GoLLRB/llrb - go get github.com/petar/GoLLRB/llrb
- go get github.com/peterh/liner - go get github.com/peterh/liner

View file

@ -44,6 +44,9 @@ import (
_ "github.com/google/cayley/graph/leveldb" _ "github.com/google/cayley/graph/leveldb"
_ "github.com/google/cayley/graph/memstore" _ "github.com/google/cayley/graph/memstore"
_ "github.com/google/cayley/graph/mongo" _ "github.com/google/cayley/graph/mongo"
// Load writer registry
_ "github.com/google/cayley/writer"
) )
var ( var (
@ -105,7 +108,7 @@ func main() {
} }
var ( var (
ts graph.TripleStore handle *graph.Handle
err error err error
) )
switch cmd { switch cmd {
@ -123,60 +126,60 @@ func main() {
break break
} }
if *tripleFile != "" { if *tripleFile != "" {
ts, err = db.Open(cfg) handle, err = db.Open(cfg)
if err != nil { if err != nil {
break break
} }
err = load(ts, cfg, *tripleFile, *tripleType) err = load(handle.QuadWriter, cfg, *tripleFile, *tripleType)
if err != nil { if err != nil {
break break
} }
ts.Close() handle.Close()
} }
case "load": case "load":
ts, err = db.Open(cfg) handle, err = db.Open(cfg)
if err != nil { if err != nil {
break break
} }
err = load(ts, cfg, *tripleFile, *tripleType) err = load(handle.QuadWriter, cfg, *tripleFile, *tripleType)
if err != nil { if err != nil {
break break
} }
ts.Close() handle.Close()
case "repl": case "repl":
ts, err = db.Open(cfg) handle, err = db.Open(cfg)
if err != nil { if err != nil {
break break
} }
if !graph.IsPersistent(cfg.DatabaseType) { if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", *tripleType) err = load(handle.QuadWriter, cfg, "", *tripleType)
if err != nil { if err != nil {
break break
} }
} }
err = db.Repl(ts, *queryLanguage, cfg) err = db.Repl(handle, *queryLanguage, cfg)
ts.Close() handle.Close()
case "http": case "http":
ts, err = db.Open(cfg) handle, err = db.Open(cfg)
if err != nil { if err != nil {
break break
} }
if !graph.IsPersistent(cfg.DatabaseType) { if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", *tripleType) err = load(handle.QuadWriter, cfg, "", *tripleType)
if err != nil { if err != nil {
break break
} }
} }
http.Serve(ts, cfg) http.Serve(handle, cfg)
ts.Close() handle.Close()
default: default:
fmt.Println("No command", cmd) fmt.Println("No command", cmd)
@ -187,7 +190,29 @@ func main() {
} }
} }
func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error { func load(qw graph.QuadWriter, cfg *config.Config, path, typ string) error {
return decompressAndLoad(qw, cfg, path, typ, db.Load)
}
func removeAll(qw graph.QuadWriter, cfg *config.Config, path, typ string) error {
return decompressAndLoad(qw, cfg, path, typ, remove)
}
func remove(qw graph.QuadWriter, cfg *config.Config, dec quad.Unmarshaler) error {
for {
t, err := dec.Unmarshal()
if err != nil {
if err == io.EOF {
break
}
return err
}
qw.RemoveQuad(t)
}
return nil
}
func decompressAndLoad(qw graph.QuadWriter, cfg *config.Config, path, typ string, loadFn func(graph.QuadWriter, *config.Config, quad.Unmarshaler) error) error {
var r io.Reader var r io.Reader
if path == "" { if path == "" {
@ -233,7 +258,7 @@ func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error {
return fmt.Errorf("unknown quad format %q", typ) return fmt.Errorf("unknown quad format %q", typ)
} }
return db.Load(ts, cfg, dec) return db.Load(qw, cfg, dec)
} }
const ( const (

View file

@ -297,26 +297,45 @@ var m2_actors = movie2.Save("name","movie2").Follow(filmToActor)
` `
var ( var (
once sync.Once create sync.Once
deleteAndRecreate sync.Once
cfg = &config.Config{ cfg = &config.Config{
DatabasePath: "30kmoviedata.nq.gz", DatabasePath: "30kmoviedata.nq.gz",
DatabaseType: "memstore", DatabaseType: "memstore",
ReplicationType: "single",
Timeout: 300 * time.Second, Timeout: 300 * time.Second,
} }
ts graph.TripleStore handle *graph.Handle
) )
func prepare(t testing.TB) { func prepare(t testing.TB) {
var err error var err error
once.Do(func() { create.Do(func() {
ts, err = db.Open(cfg) handle, err = db.Open(cfg)
if err != nil { if err != nil {
t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err) t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err)
} }
if !graph.IsPersistent(cfg.DatabaseType) { if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", "cquad") err = load(handle.QuadWriter, cfg, "", "cquad")
if err != nil {
t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err)
}
}
})
}
func deletePrepare(t testing.TB) {
var err error
deleteAndRecreate.Do(func() {
prepare(t)
if !graph.IsPersistent(cfg.DatabaseType) {
err = removeAll(handle.QuadWriter, cfg, "", "cquad")
if err != nil {
t.Fatalf("Failed to remove %q: %v", cfg.DatabasePath, err)
}
err = load(handle.QuadWriter, cfg, "", "cquad")
if err != nil { if err != nil {
t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err)
} }
@ -326,11 +345,23 @@ func prepare(t testing.TB) {
func TestQueries(t *testing.T) { func TestQueries(t *testing.T) {
prepare(t) prepare(t)
checkQueries(t)
}
func TestDeletedAndRecreatedQueries(t *testing.T) {
if testing.Short() {
t.Skip()
}
deletePrepare(t)
checkQueries(t)
}
func checkQueries(t *testing.T) {
for _, test := range benchmarkQueries { for _, test := range benchmarkQueries {
if testing.Short() && test.long { if testing.Short() && test.long {
continue continue
} }
ses := gremlin.NewSession(ts, cfg.Timeout, true) ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true)
_, err := ses.InputParses(test.query) _, err := ses.InputParses(test.query)
if err != nil { if err != nil {
t.Fatalf("Failed to parse benchmark gremlin %s: %v", test.message, err) t.Fatalf("Failed to parse benchmark gremlin %s: %v", test.message, err)
@ -382,7 +413,7 @@ func runBench(n int, b *testing.B) {
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
c := make(chan interface{}, 5) c := make(chan interface{}, 5)
ses := gremlin.NewSession(ts, cfg.Timeout, true) ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true)
// Do the parsing we know works. // Do the parsing we know works.
ses.InputParses(benchmarkQueries[n].query) ses.InputParses(benchmarkQueries[n].query)
b.StartTimer() b.StartTimer()

View file

@ -29,6 +29,8 @@ type Config struct {
DatabaseType string DatabaseType string
DatabasePath string DatabasePath string
DatabaseOptions map[string]interface{} DatabaseOptions map[string]interface{}
ReplicationType string
ReplicationOptions map[string]interface{}
ListenHost string ListenHost string
ListenPort string ListenPort string
ReadOnly bool ReadOnly bool
@ -40,6 +42,8 @@ type config struct {
DatabaseType string `json:"database"` DatabaseType string `json:"database"`
DatabasePath string `json:"db_path"` DatabasePath string `json:"db_path"`
DatabaseOptions map[string]interface{} `json:"db_options"` DatabaseOptions map[string]interface{} `json:"db_options"`
ReplicationType string `json:"replication"`
ReplicationOptions map[string]interface{} `json:"replication_options"`
ListenHost string `json:"listen_host"` ListenHost string `json:"listen_host"`
ListenPort string `json:"listen_port"` ListenPort string `json:"listen_port"`
ReadOnly bool `json:"read_only"` ReadOnly bool `json:"read_only"`
@ -57,6 +61,8 @@ func (c *Config) UnmarshalJSON(data []byte) error {
DatabaseType: t.DatabaseType, DatabaseType: t.DatabaseType,
DatabasePath: t.DatabasePath, DatabasePath: t.DatabasePath,
DatabaseOptions: t.DatabaseOptions, DatabaseOptions: t.DatabaseOptions,
ReplicationType: t.ReplicationType,
ReplicationOptions: t.ReplicationOptions,
ListenHost: t.ListenHost, ListenHost: t.ListenHost,
ListenPort: t.ListenPort, ListenPort: t.ListenPort,
ReadOnly: t.ReadOnly, ReadOnly: t.ReadOnly,
@ -71,6 +77,8 @@ func (c *Config) MarshalJSON() ([]byte, error) {
DatabaseType: c.DatabaseType, DatabaseType: c.DatabaseType,
DatabasePath: c.DatabasePath, DatabasePath: c.DatabasePath,
DatabaseOptions: c.DatabaseOptions, DatabaseOptions: c.DatabaseOptions,
ReplicationType: c.ReplicationType,
ReplicationOptions: c.ReplicationOptions,
ListenHost: c.ListenHost, ListenHost: c.ListenHost,
ListenPort: c.ListenPort, ListenPort: c.ListenPort,
ReadOnly: c.ReadOnly, ReadOnly: c.ReadOnly,
@ -117,6 +125,7 @@ func (d *duration) MarshalJSON() ([]byte, error) {
var ( var (
databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.")
databaseBackend = flag.String("db", "memstore", "Database Backend.") databaseBackend = flag.String("db", "memstore", "Database Backend.")
replicationBackend = flag.String("replication", "single", "Replication method.")
host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).") host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).")
loadSize = flag.Int("load_size", 10000, "Size of triplesets to load") loadSize = flag.Int("load_size", 10000, "Size of triplesets to load")
port = flag.String("port", "64210", "Port to listen on.") port = flag.String("port", "64210", "Port to listen on.")
@ -175,6 +184,10 @@ func ParseConfigFromFlagsAndFile(fileFlag string) *Config {
config.DatabaseType = *databaseBackend config.DatabaseType = *databaseBackend
} }
if config.ReplicationType == "" {
config.ReplicationType = *replicationBackend
}
if config.ListenHost == "" { if config.ListenHost == "" {
config.ListenHost = *host config.ListenHost = *host
} }

View file

@ -36,8 +36,20 @@ func Init(cfg *config.Config) error {
return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
} }
func Open(cfg *config.Config) (graph.TripleStore, error) { func Open(cfg *config.Config) (*graph.Handle, error) {
glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath) qs, err := OpenQuadStore(cfg)
if err != nil {
return nil, err
}
qw, err := OpenQuadWriter(qs, cfg)
if err != nil {
return nil, err
}
return &graph.Handle{QuadStore: qs, QuadWriter: qw}, nil
}
func OpenQuadStore(cfg *config.Config) (graph.TripleStore, error) {
glog.Infof("Opening quad store %q at %s", cfg.DatabaseType, cfg.DatabasePath)
ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
if err != nil { if err != nil {
return nil, err return nil, err
@ -46,19 +58,17 @@ func Open(cfg *config.Config) (graph.TripleStore, error) {
return ts, nil return ts, nil
} }
func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { func OpenQuadWriter(qs graph.TripleStore, cfg *config.Config) (graph.QuadWriter, error) {
bulker, canBulk := ts.(graph.BulkLoader) glog.Infof("Opening replication method %q", cfg.ReplicationType)
if canBulk { w, err := graph.NewQuadWriter(cfg.ReplicationType, qs, cfg.ReplicationOptions)
switch err := bulker.BulkLoad(dec); err { if err != nil {
case nil: return nil, err
return nil
case graph.ErrCannotBulkLoad:
// Try individual loading.
default:
return err
}
} }
return w, nil
}
func Load(qw graph.QuadWriter, cfg *config.Config, dec quad.Unmarshaler) error {
block := make([]quad.Quad, 0, cfg.LoadSize) block := make([]quad.Quad, 0, cfg.LoadSize)
for { for {
t, err := dec.Unmarshal() t, err := dec.Unmarshal()
@ -70,11 +80,11 @@ func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error
} }
block = append(block, t) block = append(block, t)
if len(block) == cap(block) { if len(block) == cap(block) {
ts.AddTripleSet(block) qw.AddQuadSet(block)
block = block[:0] block = block[:0]
} }
} }
ts.AddTripleSet(block) qw.AddQuadSet(block)
return nil return nil
} }

View file

@ -70,17 +70,17 @@ const (
history = ".cayley_history" history = ".cayley_history"
) )
func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error { func Repl(h *graph.Handle, queryLanguage string, cfg *config.Config) error {
var ses query.Session var ses query.Session
switch queryLanguage { switch queryLanguage {
case "sexp": case "sexp":
ses = sexp.NewSession(ts) ses = sexp.NewSession(h.QuadStore)
case "mql": case "mql":
ses = mql.NewSession(ts) ses = mql.NewSession(h.QuadStore)
case "gremlin": case "gremlin":
fallthrough fallthrough
default: default:
ses = gremlin.NewSession(ts, cfg.Timeout, true) ses = gremlin.NewSession(h.QuadStore, cfg.Timeout, true)
} }
term, err := terminal(history) term, err := terminal(history)
@ -124,25 +124,25 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error
continue continue
case strings.HasPrefix(line, ":a"): case strings.HasPrefix(line, ":a"):
triple, err := cquads.Parse(line[3:]) quad, err := cquads.Parse(line[3:])
if !triple.IsValid() { if !quad.IsValid() {
if err != nil { if err != nil {
fmt.Printf("not a valid triple: %v\n", err) fmt.Printf("not a valid quad: %v\n", err)
} }
continue continue
} }
ts.AddTriple(triple) h.QuadWriter.AddQuad(quad)
continue continue
case strings.HasPrefix(line, ":d"): case strings.HasPrefix(line, ":d"):
triple, err := cquads.Parse(line[3:]) quad, err := cquads.Parse(line[3:])
if !triple.IsValid() { if !quad.IsValid() {
if err != nil { if err != nil {
fmt.Printf("not a valid triple: %v\n", err) fmt.Printf("not a valid quad: %v\n", err)
} }
continue continue
} }
ts.RemoveTriple(triple) h.QuadWriter.RemoveQuad(quad)
continue continue
} }
} }

View file

@ -36,9 +36,7 @@ func (qs *store) ValueOf(s string) graph.Value {
return nil return nil
} }
func (qs *store) AddTriple(quad.Quad) {} func (qs *store) ApplyDeltas([]graph.Delta) error { return nil }
func (qs *store) AddTripleSet([]quad.Quad) {}
func (qs *store) Quad(graph.Value) quad.Quad { return quad.Quad{} } func (qs *store) Quad(graph.Value) quad.Quad { return quad.Quad{} }
@ -60,6 +58,8 @@ func (qs *store) NameOf(v graph.Value) string {
func (qs *store) Size() int64 { return 0 } func (qs *store) Size() int64 { return 0 }
func (qs *store) Horizon() int64 { return 0 }
func (qs *store) DebugPrint() {} func (qs *store) DebugPrint() {}
func (qs *store) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { func (qs *store) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {

View file

@ -16,9 +16,11 @@ package leveldb
import ( import (
"bytes" "bytes"
"encoding/json"
"fmt" "fmt"
"strings" "strings"
"github.com/barakmich/glog"
ldbit "github.com/syndtr/goleveldb/leveldb/iterator" ldbit "github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt" "github.com/syndtr/goleveldb/leveldb/opt"
@ -65,10 +67,9 @@ func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleS
ok := it.iter.Seek(it.nextPrefix) ok := it.iter.Seek(it.nextPrefix)
if !ok { if !ok {
// FIXME(kortschak) What are the semantics here? Is this iterator usable?
// If not, we should return nil *Iterator and an error.
it.open = false it.open = false
it.iter.Release() it.iter.Release()
glog.Error("Opening LevelDB iterator couldn't seek to location ", it.nextPrefix)
} }
return &it return &it
@ -117,6 +118,12 @@ func (it *Iterator) Close() {
} }
} }
func (it *Iterator) isLiveValue(val []byte) bool {
var entry IndexEntry
json.Unmarshal(val, &entry)
return len(entry.History)%2 != 0
}
func (it *Iterator) Next() bool { func (it *Iterator) Next() bool {
if it.iter == nil { if it.iter == nil {
it.result = nil it.result = nil
@ -132,6 +139,9 @@ func (it *Iterator) Next() bool {
return false return false
} }
if bytes.HasPrefix(it.iter.Key(), it.nextPrefix) { if bytes.HasPrefix(it.iter.Key(), it.nextPrefix) {
if !it.isLiveValue(it.iter.Value()) {
return it.Next()
}
out := make([]byte, len(it.iter.Key())) out := make([]byte, len(it.iter.Key()))
copy(out, it.iter.Key()) copy(out, it.iter.Key())
it.result = Token(out) it.result = Token(out)
@ -173,7 +183,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
case quad.Object: case quad.Object:
return 2*hashSize + 2 return 2*hashSize + 2
case quad.Label: case quad.Label:
return -1 return 3*hashSize + 2
} }
} }
if bytes.Equal(prefix, []byte("po")) { if bytes.Equal(prefix, []byte("po")) {
@ -185,7 +195,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
case quad.Object: case quad.Object:
return hashSize + 2 return hashSize + 2
case quad.Label: case quad.Label:
return -1 return hashSize + 2
} }
} }
if bytes.Equal(prefix, []byte("os")) { if bytes.Equal(prefix, []byte("os")) {
@ -197,7 +207,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
case quad.Object: case quad.Object:
return 2 return 2
case quad.Label: case quad.Label:
return -1 return 3*hashSize + 2
} }
} }
if bytes.Equal(prefix, []byte("cp")) { if bytes.Equal(prefix, []byte("cp")) {
@ -221,17 +231,18 @@ func (it *Iterator) Contains(v graph.Value) bool {
return false return false
} }
offset := PositionOf(val[0:2], it.dir, it.qs) offset := PositionOf(val[0:2], it.dir, it.qs)
if offset != -1 {
if bytes.HasPrefix(val[offset:], it.checkId[1:]) { if bytes.HasPrefix(val[offset:], it.checkId[1:]) {
// You may ask, why don't we check to see if it's a valid (not deleted) triple
// again?
//
// We've already done that -- in order to get the graph.Value token in the
// first place, we had to have done the check already; it came from a Next().
//
// However, if it ever starts coming from somewhere else, it'll be more
// efficient to change the interface of the graph.Value for LevelDB to a
// struct with a flag for isValid, to save another random read.
return true return true
} }
} else {
nameForDir := it.qs.Quad(v).Get(it.dir)
hashForDir := it.qs.ValueOf(nameForDir).(Token)
if bytes.Equal(hashForDir, it.checkId) {
return true
}
}
return false return false
} }

View file

@ -24,6 +24,7 @@ import (
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator" "github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
"github.com/google/cayley/writer"
) )
func makeTripleSet() []quad.Quad { func makeTripleSet() []quad.Quad {
@ -135,7 +136,8 @@ func TestLoadDatabase(t *testing.T) {
t.Error("Failed to create leveldb TripleStore.") t.Error("Failed to create leveldb TripleStore.")
} }
qs.AddTriple(quad.Quad{"Something", "points_to", "Something Else", "context"}) w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuad(quad.Quad{"Something", "points_to", "Something Else", "context"})
for _, pq := range []string{"Something", "points_to", "Something Else", "context"} { for _, pq := range []string{"Something", "points_to", "Something Else", "context"} {
if got := qs.NameOf(qs.ValueOf(pq)); got != pq { if got := qs.NameOf(qs.ValueOf(pq)); got != pq {
t.Errorf("Failed to roundtrip %q, got:%q expect:%q", pq, got, pq) t.Errorf("Failed to roundtrip %q, got:%q expect:%q", pq, got, pq)
@ -154,13 +156,14 @@ func TestLoadDatabase(t *testing.T) {
if qs == nil || err != nil { if qs == nil || err != nil {
t.Error("Failed to create leveldb TripleStore.") t.Error("Failed to create leveldb TripleStore.")
} }
w, _ = writer.NewSingleReplication(qs, nil)
ts2, didConvert := qs.(*TripleStore) ts2, didConvert := qs.(*TripleStore)
if !didConvert { if !didConvert {
t.Errorf("Could not convert from generic to LevelDB TripleStore") t.Errorf("Could not convert from generic to LevelDB TripleStore")
} }
qs.AddTripleSet(makeTripleSet()) w.AddQuadSet(makeTripleSet())
if s := qs.Size(); s != 11 { if s := qs.Size(); s != 11 {
t.Errorf("Unexpected triplestore size, got:%d expect:11", s) t.Errorf("Unexpected triplestore size, got:%d expect:11", s)
} }
@ -168,7 +171,7 @@ func TestLoadDatabase(t *testing.T) {
t.Errorf("Unexpected triplestore size, got:%d expect:5", s) t.Errorf("Unexpected triplestore size, got:%d expect:5", s)
} }
qs.RemoveTriple(quad.Quad{"A", "follows", "B", ""}) w.RemoveQuad(quad.Quad{"A", "follows", "B", ""})
if s := qs.Size(); s != 10 { if s := qs.Size(); s != 10 {
t.Errorf("Unexpected triplestore size after RemoveTriple, got:%d expect:10", s) t.Errorf("Unexpected triplestore size after RemoveTriple, got:%d expect:10", s)
} }
@ -196,7 +199,9 @@ func TestIterator(t *testing.T) {
if qs == nil || err != nil { if qs == nil || err != nil {
t.Error("Failed to create leveldb TripleStore.") t.Error("Failed to create leveldb TripleStore.")
} }
qs.AddTripleSet(makeTripleSet())
w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuadSet(makeTripleSet())
var it graph.Iterator var it graph.Iterator
it = qs.NodesAllIterator() it = qs.NodesAllIterator()
@ -291,7 +296,8 @@ func TestSetIterator(t *testing.T) {
} }
defer qs.Close() defer qs.Close()
qs.AddTripleSet(makeTripleSet()) w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuadSet(makeTripleSet())
expect := []quad.Quad{ expect := []quad.Quad{
{"C", "follows", "B", ""}, {"C", "follows", "B", ""},
@ -403,7 +409,9 @@ func TestOptimize(t *testing.T) {
if qs == nil || err != nil { if qs == nil || err != nil {
t.Error("Failed to create leveldb TripleStore.") t.Error("Failed to create leveldb TripleStore.")
} }
qs.AddTripleSet(makeTripleSet())
w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuadSet(makeTripleSet())
// With an linksto-fixed pair // With an linksto-fixed pair
fixed := qs.FixedIterator() fixed := qs.FixedIterator()

View file

@ -19,6 +19,7 @@ import (
"crypto/sha1" "crypto/sha1"
"encoding/binary" "encoding/binary"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"hash" "hash"
"sync" "sync"
@ -62,6 +63,7 @@ type TripleStore struct {
path string path string
open bool open bool
size int64 size int64
horizon int64
writeopts *opt.WriteOptions writeopts *opt.WriteOptions
readopts *opt.ReadOptions readopts *opt.ReadOptions
} }
@ -85,6 +87,7 @@ func createNewLevelDB(path string, _ graph.Options) error {
func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) { func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) {
var qs TripleStore var qs TripleStore
var err error
qs.path = path qs.path = path
cache_size := DefaultCacheSize cache_size := DefaultCacheSize
if val, ok := options.IntKey("cache_size_mb"); ok { if val, ok := options.IntKey("cache_size_mb"); ok {
@ -106,11 +109,15 @@ func newTripleStore(path string, options graph.Options) (graph.TripleStore, erro
qs.readopts = &opt.ReadOptions{} qs.readopts = &opt.ReadOptions{}
db, err := leveldb.OpenFile(qs.path, qs.dbOpts) db, err := leveldb.OpenFile(qs.path, qs.dbOpts)
if err != nil { if err != nil {
panic("Error, couldn't open! " + err.Error()) glog.Errorln("Error, couldn't open! ", err)
return nil, err
} }
qs.db = db qs.db = db
glog.Infoln(qs.GetStats()) glog.Infoln(qs.GetStats())
qs.getSize() err = qs.getMetadata()
if err != nil {
return nil, err
}
return &qs, nil return &qs, nil
} }
@ -128,24 +135,25 @@ func (qs *TripleStore) Size() int64 {
return qs.size return qs.size
} }
func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { func (qs *TripleStore) Horizon() int64 {
return qs.horizon
}
func (qa *TripleStore) createDeltaKeyFor(d graph.Delta) []byte {
key := make([]byte, 0, 19)
key = append(key, 'd')
key = append(key, []byte(fmt.Sprintf("%018x", d.ID))...)
return key
}
func (qs *TripleStore) createKeyFor(d [4]quad.Direction, triple quad.Quad) []byte {
key := make([]byte, 0, 2+(hashSize*3)) key := make([]byte, 0, 2+(hashSize*3))
// TODO(kortschak) Remove dependence on String() method. // TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
return key key = append(key, qs.convertStringToByteHash(triple.Get(d[3]))...)
}
func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
key := make([]byte, 0, 2+(hashSize*4))
// TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
return key return key
} }
@ -156,76 +164,98 @@ func (qs *TripleStore) createValueKeyFor(s string) []byte {
return key return key
} }
func (qs *TripleStore) AddTriple(t quad.Quad) { type IndexEntry struct {
batch := &leveldb.Batch{} quad.Quad
qs.buildWrite(batch, t) History []int64
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Errorf("Couldn't write to DB for triple %s.", t)
return
}
qs.size++
} }
// Short hand for direction permutations. // Short hand for direction permutations.
var ( var (
spo = [3]quad.Direction{quad.Subject, quad.Predicate, quad.Object} spo = [4]quad.Direction{quad.Subject, quad.Predicate, quad.Object, quad.Label}
osp = [3]quad.Direction{quad.Object, quad.Subject, quad.Predicate} osp = [4]quad.Direction{quad.Object, quad.Subject, quad.Predicate, quad.Label}
pos = [3]quad.Direction{quad.Predicate, quad.Object, quad.Subject} pos = [4]quad.Direction{quad.Predicate, quad.Object, quad.Subject, quad.Label}
pso = [3]quad.Direction{quad.Predicate, quad.Subject, quad.Object} cps = [4]quad.Direction{quad.Label, quad.Predicate, quad.Subject, quad.Object}
) )
func (qs *TripleStore) RemoveTriple(t quad.Quad) { func (qs *TripleStore) ApplyDeltas(deltas []graph.Delta) error {
_, err := qs.db.Get(qs.createKeyFor(spo, t), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
glog.Error("Couldn't access DB to confirm deletion")
return
}
if err == leveldb.ErrNotFound {
// No such triple in the database, forget about it.
return
}
batch := &leveldb.Batch{} batch := &leveldb.Batch{}
batch.Delete(qs.createKeyFor(spo, t)) resizeMap := make(map[string]int64)
batch.Delete(qs.createKeyFor(osp, t)) size_change := int64(0)
batch.Delete(qs.createKeyFor(pos, t)) for _, d := range deltas {
qs.UpdateValueKeyBy(t.Get(quad.Subject), -1, batch) bytes, err := json.Marshal(d)
qs.UpdateValueKeyBy(t.Get(quad.Predicate), -1, batch)
qs.UpdateValueKeyBy(t.Get(quad.Object), -1, batch)
if t.Get(quad.Label) != "" {
batch.Delete(qs.createProvKeyFor(pso, t))
qs.UpdateValueKeyBy(t.Get(quad.Label), -1, batch)
}
err = qs.db.Write(batch, nil)
if err != nil { if err != nil {
glog.Errorf("Couldn't delete triple %s.", t) return err
return
} }
qs.size-- batch.Put(qs.createDeltaKeyFor(d), bytes)
err = qs.buildQuadWrite(batch, d.Quad, d.ID, d.Action == graph.Add)
if err != nil {
return err
}
delta := int64(1)
if d.Action == graph.Delete {
delta = int64(-1)
}
resizeMap[d.Quad.Subject] += delta
resizeMap[d.Quad.Predicate] += delta
resizeMap[d.Quad.Object] += delta
if d.Quad.Label != "" {
resizeMap[d.Quad.Label] += delta
}
size_change += delta
qs.horizon = d.ID
}
for k, v := range resizeMap {
if v != 0 {
err := qs.UpdateValueKeyBy(k, v, batch)
if err != nil {
return err
}
}
}
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Error("Couldn't write to DB for tripleset.")
return err
}
qs.size += size_change
return nil
} }
func (qs *TripleStore) buildTripleWrite(batch *leveldb.Batch, t quad.Quad) { func (qs *TripleStore) buildQuadWrite(batch *leveldb.Batch, q quad.Quad, id int64, isAdd bool) error {
bytes, err := json.Marshal(t) var entry IndexEntry
data, err := qs.db.Get(qs.createKeyFor(spo, q), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
glog.Error("Couldn't access DB to prepare index: ", err)
return err
}
if err == nil {
// We got something.
err = json.Unmarshal(data, &entry)
if err != nil { if err != nil {
glog.Errorf("Couldn't write to buffer for triple %s: %s", t, err) return err
return
} }
batch.Put(qs.createKeyFor(spo, t), bytes) } else {
batch.Put(qs.createKeyFor(osp, t), bytes) entry.Quad = q
batch.Put(qs.createKeyFor(pos, t), bytes)
if t.Get(quad.Label) != "" {
batch.Put(qs.createProvKeyFor(pso, t), bytes)
} }
} entry.History = append(entry.History, id)
func (qs *TripleStore) buildWrite(batch *leveldb.Batch, t quad.Quad) { if isAdd && len(entry.History)%2 == 0 {
qs.buildTripleWrite(batch, t) glog.Error("Entry History is out of sync for", entry)
qs.UpdateValueKeyBy(t.Get(quad.Subject), 1, nil) return errors.New("Odd index history")
qs.UpdateValueKeyBy(t.Get(quad.Predicate), 1, nil)
qs.UpdateValueKeyBy(t.Get(quad.Object), 1, nil)
if t.Get(quad.Label) != "" {
qs.UpdateValueKeyBy(t.Get(quad.Label), 1, nil)
} }
bytes, err := json.Marshal(entry)
if err != nil {
glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err)
return err
}
batch.Put(qs.createKeyFor(spo, q), bytes)
batch.Put(qs.createKeyFor(osp, q), bytes)
batch.Put(qs.createKeyFor(pos, q), bytes)
if q.Get(quad.Label) != "" {
batch.Put(qs.createKeyFor(cps, q), bytes)
}
return nil
} }
type ValueData struct { type ValueData struct {
@ -233,15 +263,15 @@ type ValueData struct {
Size int64 Size int64
} }
func (qs *TripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb.Batch) { func (qs *TripleStore) UpdateValueKeyBy(name string, amount int64, batch *leveldb.Batch) error {
value := &ValueData{name, int64(amount)} value := &ValueData{name, amount}
key := qs.createValueKeyFor(name) key := qs.createValueKeyFor(name)
b, err := qs.db.Get(key, qs.readopts) b, err := qs.db.Get(key, qs.readopts)
// Error getting the node from the database. // Error getting the node from the database.
if err != nil && err != leveldb.ErrNotFound { if err != nil && err != leveldb.ErrNotFound {
glog.Errorf("Error reading Value %s from the DB.", name) glog.Errorf("Error reading Value %s from the DB.", name)
return return err
} }
// Node exists in the database -- unmarshal and update. // Node exists in the database -- unmarshal and update.
@ -249,58 +279,28 @@ func (qs *TripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb.
err = json.Unmarshal(b, value) err = json.Unmarshal(b, value)
if err != nil { if err != nil {
glog.Errorf("Error: couldn't reconstruct value: %v", err) glog.Errorf("Error: couldn't reconstruct value: %v", err)
return return err
} }
value.Size += int64(amount) value.Size += amount
} }
// Are we deleting something? // Are we deleting something?
if amount < 0 {
if value.Size <= 0 { if value.Size <= 0 {
if batch == nil { value.Size = 0
qs.db.Delete(key, qs.writeopts)
} else {
batch.Delete(key)
}
return
}
} }
// Repackage and rewrite. // Repackage and rewrite.
bytes, err := json.Marshal(&value) bytes, err := json.Marshal(&value)
if err != nil { if err != nil {
glog.Errorf("Couldn't write to buffer for value %s: %s", name, err) glog.Errorf("Couldn't write to buffer for value %s: %s", name, err)
return return err
} }
if batch == nil { if batch == nil {
qs.db.Put(key, bytes, qs.writeopts) qs.db.Put(key, bytes, qs.writeopts)
} else { } else {
batch.Put(key, bytes) batch.Put(key, bytes)
} }
} return nil
func (qs *TripleStore) AddTripleSet(t_s []quad.Quad) {
batch := &leveldb.Batch{}
newTs := len(t_s)
resizeMap := make(map[string]int)
for _, t := range t_s {
qs.buildTripleWrite(batch, t)
resizeMap[t.Subject]++
resizeMap[t.Predicate]++
resizeMap[t.Object]++
if t.Label != "" {
resizeMap[t.Label]++
}
}
for k, v := range resizeMap {
qs.UpdateValueKeyBy(k, v, batch)
}
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Error("Couldn't write to DB for tripleset.")
return
}
qs.size += int64(newTs)
} }
func (qs *TripleStore) Close() { func (qs *TripleStore) Close() {
@ -314,6 +314,16 @@ func (qs *TripleStore) Close() {
} else { } else {
glog.Errorf("Couldn't convert size before closing!") glog.Errorf("Couldn't convert size before closing!")
} }
buf.Reset()
err = binary.Write(buf, binary.LittleEndian, qs.horizon)
if err == nil {
werr := qs.db.Put([]byte("__horizon"), buf.Bytes(), qs.writeopts)
if werr != nil {
glog.Error("Couldn't write horizon before closing!")
}
} else {
glog.Errorf("Couldn't convert horizon before closing!")
}
qs.db.Close() qs.db.Close()
qs.open = false qs.open = false
} }
@ -386,23 +396,34 @@ func (qs *TripleStore) SizeOf(k graph.Value) int64 {
return int64(qs.valueData(k.(Token)).Size) return int64(qs.valueData(k.(Token)).Size)
} }
func (qs *TripleStore) getSize() { func (qs *TripleStore) getInt64ForKey(key string, empty int64) (int64, error) {
var size int64 var out int64
b, err := qs.db.Get([]byte("__size"), qs.readopts) b, err := qs.db.Get([]byte(key), qs.readopts)
if err != nil && err != leveldb.ErrNotFound { if err != nil && err != leveldb.ErrNotFound {
panic("Couldn't read size " + err.Error()) glog.Errorln("Couldn't read " + key + ": " + err.Error())
return 0, err
} }
if err == leveldb.ErrNotFound { if err == leveldb.ErrNotFound {
// Must be a new database. Cool // Must be a new database. Cool
qs.size = 0 return empty, nil
return
} }
buf := bytes.NewBuffer(b) buf := bytes.NewBuffer(b)
err = binary.Read(buf, binary.LittleEndian, &size) err = binary.Read(buf, binary.LittleEndian, &out)
if err != nil { if err != nil {
glog.Errorln("Error: couldn't parse size") glog.Errorln("Error: couldn't parse", key)
return 0, err
} }
qs.size = size return out, nil
}
func (qs *TripleStore) getMetadata() error {
var err error
qs.size, err = qs.getInt64ForKey("__size", 0)
if err != nil {
return err
}
qs.horizon, err = qs.getInt64ForKey("__horizon", 0)
return err
} }
func (qs *TripleStore) SizeOfPrefix(pre []byte) (int64, error) { func (qs *TripleStore) SizeOfPrefix(pre []byte) (int64, error) {

View file

@ -24,19 +24,22 @@ type AllIterator struct {
ts *TripleStore ts *TripleStore
} }
func NewMemstoreAllIterator(ts *TripleStore) *AllIterator { type NodesAllIterator AllIterator
var out AllIterator type QuadsAllIterator AllIterator
func NewMemstoreNodesAllIterator(ts *TripleStore) *NodesAllIterator {
var out NodesAllIterator
out.Int64 = *iterator.NewInt64(1, ts.idCounter-1) out.Int64 = *iterator.NewInt64(1, ts.idCounter-1)
out.ts = ts out.ts = ts
return &out return &out
} }
// No subiterators. // No subiterators.
func (it *AllIterator) SubIterators() []graph.Iterator { func (it *NodesAllIterator) SubIterators() []graph.Iterator {
return nil return nil
} }
func (it *AllIterator) Next() bool { func (it *NodesAllIterator) Next() bool {
if !it.Int64.Next() { if !it.Int64.Next() {
return false return false
} }
@ -46,3 +49,21 @@ func (it *AllIterator) Next() bool {
} }
return true return true
} }
func NewMemstoreQuadsAllIterator(ts *TripleStore) *QuadsAllIterator {
var out QuadsAllIterator
out.Int64 = *iterator.NewInt64(1, ts.quadIdCounter-1)
out.ts = ts
return &out
}
func (qit *QuadsAllIterator) Next() bool {
out := qit.Int64.Next()
if out {
i64 := qit.Int64.Result().(int64)
if qit.ts.log[i64].DeletedBy != 0 || qit.ts.log[i64].Action == graph.Delete {
return qit.Next()
}
}
return out
}

972
graph/memstore/b/keys.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,396 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package b
import (
"math"
"runtime/debug"
"testing"
"github.com/cznic/mathutil"
)
func rng() *mathutil.FC32 {
x, err := mathutil.NewFC32(math.MinInt32/4, math.MaxInt32/4, false)
if err != nil {
panic(err)
}
return x
}
func cmp(a, b int64) int {
return int(a - b)
}
func BenchmarkSetSeq1e3(b *testing.B) {
benchmarkSetSeq(b, 1e3)
}
func BenchmarkSetSeq1e4(b *testing.B) {
benchmarkSetSeq(b, 1e4)
}
func BenchmarkSetSeq1e5(b *testing.B) {
benchmarkSetSeq(b, 1e5)
}
func BenchmarkSetSeq1e6(b *testing.B) {
benchmarkSetSeq(b, 1e6)
}
func benchmarkSetSeq(b *testing.B, n int) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
r.Set(j, struct{}{})
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkGetSeq1e3(b *testing.B) {
benchmarkGetSeq(b, 1e3)
}
func BenchmarkGetSeq1e4(b *testing.B) {
benchmarkGetSeq(b, 1e4)
}
func BenchmarkGetSeq1e5(b *testing.B) {
benchmarkGetSeq(b, 1e5)
}
func BenchmarkGetSeq1e6(b *testing.B) {
benchmarkGetSeq(b, 1e6)
}
func benchmarkGetSeq(b *testing.B, n int) {
r := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
r.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for j := int64(0); j < int64(n); j++ {
r.Get(j)
}
}
b.StopTimer()
r.Close()
}
func BenchmarkSetRnd1e3(b *testing.B) {
benchmarkSetRnd(b, 1e3)
}
func BenchmarkSetRnd1e4(b *testing.B) {
benchmarkSetRnd(b, 1e4)
}
func BenchmarkSetRnd1e5(b *testing.B) {
benchmarkSetRnd(b, 1e5)
}
func BenchmarkSetRnd1e6(b *testing.B) {
benchmarkSetRnd(b, 1e6)
}
func benchmarkSetRnd(b *testing.B, n int) {
rng := rng()
a := make([]int, n)
for i := range a {
a[i] = rng.Next()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
debug.FreeOSMemory()
b.StartTimer()
for _, v := range a {
r.Set(int64(v), struct{}{})
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkGetRnd1e3(b *testing.B) {
benchmarkGetRnd(b, 1e3)
}
func BenchmarkGetRnd1e4(b *testing.B) {
benchmarkGetRnd(b, 1e4)
}
func BenchmarkGetRnd1e5(b *testing.B) {
benchmarkGetRnd(b, 1e5)
}
func BenchmarkGetRnd1e6(b *testing.B) {
benchmarkGetRnd(b, 1e6)
}
func benchmarkGetRnd(b *testing.B, n int) {
r := TreeNew(cmp)
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range a {
r.Get(v)
}
}
b.StopTimer()
r.Close()
}
func BenchmarkDelSeq1e3(b *testing.B) {
benchmarkDelSeq(b, 1e3)
}
func BenchmarkDelSeq1e4(b *testing.B) {
benchmarkDelSeq(b, 1e4)
}
func BenchmarkDelSeq1e5(b *testing.B) {
benchmarkDelSeq(b, 1e5)
}
func BenchmarkDelSeq1e6(b *testing.B) {
benchmarkDelSeq(b, 1e6)
}
func benchmarkDelSeq(b *testing.B, n int) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
for j := int64(0); j < int64(n); j++ {
r.Set(j, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
r.Delete(j)
}
}
b.StopTimer()
}
func BenchmarkDelRnd1e3(b *testing.B) {
benchmarkDelRnd(b, 1e3)
}
func BenchmarkDelRnd1e4(b *testing.B) {
benchmarkDelRnd(b, 1e4)
}
func BenchmarkDelRnd1e5(b *testing.B) {
benchmarkDelRnd(b, 1e5)
}
func BenchmarkDelRnd1e6(b *testing.B) {
benchmarkDelRnd(b, 1e6)
}
func benchmarkDelRnd(b *testing.B, n int) {
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for _, v := range a {
r.Delete(v)
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkSeekSeq1e3(b *testing.B) {
benchmarkSeekSeq(b, 1e3)
}
func BenchmarkSeekSeq1e4(b *testing.B) {
benchmarkSeekSeq(b, 1e4)
}
func BenchmarkSeekSeq1e5(b *testing.B) {
benchmarkSeekSeq(b, 1e5)
}
func BenchmarkSeekSeq1e6(b *testing.B) {
benchmarkSeekSeq(b, 1e6)
}
func benchmarkSeekSeq(b *testing.B, n int) {
for i := 0; i < b.N; i++ {
b.StopTimer()
t := TreeNew(cmp)
for j := int64(0); j < int64(n); j++ {
t.Set(j, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
e, _ := t.Seek(j)
e.Close()
}
b.StopTimer()
t.Close()
}
b.StopTimer()
}
func BenchmarkSeekRnd1e3(b *testing.B) {
benchmarkSeekRnd(b, 1e3)
}
func BenchmarkSeekRnd1e4(b *testing.B) {
benchmarkSeekRnd(b, 1e4)
}
func BenchmarkSeekRnd1e5(b *testing.B) {
benchmarkSeekRnd(b, 1e5)
}
func BenchmarkSeekRnd1e6(b *testing.B) {
benchmarkSeekRnd(b, 1e6)
}
func benchmarkSeekRnd(b *testing.B, n int) {
r := TreeNew(cmp)
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range a {
e, _ := r.Seek(v)
e.Close()
}
}
b.StopTimer()
r.Close()
}
func BenchmarkNext1e3(b *testing.B) {
benchmarkNext(b, 1e3)
}
func BenchmarkNext1e4(b *testing.B) {
benchmarkNext(b, 1e4)
}
func BenchmarkNext1e5(b *testing.B) {
benchmarkNext(b, 1e5)
}
func BenchmarkNext1e6(b *testing.B) {
benchmarkNext(b, 1e6)
}
func benchmarkNext(b *testing.B, n int) {
t := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
t.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
en, err := t.SeekFirst()
if err != nil {
b.Fatal(err)
}
m := 0
for {
if _, _, err = en.Next(); err != nil {
break
}
m++
}
if m != n {
b.Fatal(m)
}
}
b.StopTimer()
t.Close()
}
func BenchmarkPrev1e3(b *testing.B) {
benchmarkPrev(b, 1e3)
}
func BenchmarkPrev1e4(b *testing.B) {
benchmarkPrev(b, 1e4)
}
func BenchmarkPrev1e5(b *testing.B) {
benchmarkPrev(b, 1e5)
}
func BenchmarkPrev1e6(b *testing.B) {
benchmarkPrev(b, 1e6)
}
func benchmarkPrev(b *testing.B, n int) {
t := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
t.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
en, err := t.SeekLast()
if err != nil {
b.Fatal(err)
}
m := 0
for {
if _, _, err = en.Prev(); err != nil {
break
}
m++
}
if m != n {
b.Fatal(m)
}
}
}

View file

@ -19,46 +19,35 @@ import (
"math" "math"
"strings" "strings"
"github.com/petar/GoLLRB/llrb"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator" "github.com/google/cayley/graph/iterator"
"github.com/google/cayley/graph/memstore/b"
) )
type Iterator struct { type Iterator struct {
uid uint64 uid uint64
ts *TripleStore
tags graph.Tagger tags graph.Tagger
tree *llrb.LLRB tree *b.Tree
iter *b.Enumerator
data string data string
isRunning bool
iterLast Int64
result graph.Value result graph.Value
} }
type Int64 int64 func cmp(a, b int64) int {
return int(a - b)
func (i Int64) Less(than llrb.Item) bool {
return i < than.(Int64)
} }
func IterateOne(tree *llrb.LLRB, last Int64) Int64 { func NewIterator(tree *b.Tree, data string, ts *TripleStore) *Iterator {
var next Int64 iter, err := tree.SeekFirst()
tree.AscendGreaterOrEqual(last, func(i llrb.Item) bool { if err != nil {
if i.(Int64) == last { iter = nil
return true
} else {
next = i.(Int64)
return false
} }
})
return next
}
func NewLlrbIterator(tree *llrb.LLRB, data string) *Iterator {
return &Iterator{ return &Iterator{
uid: iterator.NextUID(), uid: iterator.NextUID(),
ts: ts,
tree: tree, tree: tree,
iterLast: Int64(-1), iter: iter,
data: data, data: data,
} }
} }
@ -68,7 +57,11 @@ func (it *Iterator) UID() uint64 {
} }
func (it *Iterator) Reset() { func (it *Iterator) Reset() {
it.iterLast = Int64(-1) var err error
it.iter, err = it.tree.SeekFirst()
if err != nil {
it.iter = nil
}
} }
func (it *Iterator) Tagger() *graph.Tagger { func (it *Iterator) Tagger() *graph.Tagger {
@ -86,20 +79,53 @@ func (it *Iterator) TagResults(dst map[string]graph.Value) {
} }
func (it *Iterator) Clone() graph.Iterator { func (it *Iterator) Clone() graph.Iterator {
m := NewLlrbIterator(it.tree, it.data) var iter *b.Enumerator
if it.result != nil {
var ok bool
iter, ok = it.tree.Seek(it.result.(int64))
if !ok {
panic("value unexpectedly missing")
}
} else {
var err error
iter, err = it.tree.SeekFirst()
if err != nil {
iter = nil
}
}
m := &Iterator{
uid: iterator.NextUID(),
ts: it.ts,
tree: it.tree,
iter: iter,
data: it.data,
}
m.tags.CopyFrom(it) m.tags.CopyFrom(it)
return m return m
} }
func (it *Iterator) Close() {} func (it *Iterator) Close() {}
func (it *Iterator) checkValid(index int64) bool {
return it.ts.log[index].DeletedBy == 0
}
func (it *Iterator) Next() bool { func (it *Iterator) Next() bool {
graph.NextLogIn(it) graph.NextLogIn(it)
if it.tree.Max() == nil || it.result == int64(it.tree.Max().(Int64)) {
if it.iter == nil {
return graph.NextLogOut(it, nil, false) return graph.NextLogOut(it, nil, false)
} }
it.iterLast = IterateOne(it.tree, it.iterLast) result, _, err := it.iter.Next()
it.result = int64(it.iterLast) if err != nil {
return graph.NextLogOut(it, nil, false)
}
if !it.checkValid(result) {
return it.Next()
}
it.result = result
return graph.NextLogOut(it, it.result, true) return graph.NextLogOut(it, it.result, true)
} }
@ -126,7 +152,7 @@ func (it *Iterator) Size() (int64, bool) {
func (it *Iterator) Contains(v graph.Value) bool { func (it *Iterator) Contains(v graph.Value) bool {
graph.ContainsLogIn(it, v) graph.ContainsLogIn(it, v)
if it.tree.Has(Int64(v.(int64))) { if _, ok := it.tree.Get(v.(int64)); ok {
it.result = v it.result = v
return graph.ContainsLogOut(it, v, true) return graph.ContainsLogOut(it, v, true)
} }
@ -141,7 +167,7 @@ func (it *Iterator) DebugString(indent int) string {
var memType graph.Type var memType graph.Type
func init() { func init() {
memType = graph.RegisterIterator("llrb") memType = graph.RegisterIterator("b+tree")
} }
func Type() graph.Type { return memType } func Type() graph.Type { return memType }

View file

@ -18,11 +18,11 @@ import (
"fmt" "fmt"
"github.com/barakmich/glog" "github.com/barakmich/glog"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator" "github.com/google/cayley/graph/iterator"
"github.com/google/cayley/graph/memstore/b"
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
"github.com/petar/GoLLRB/llrb"
) )
func init() { func init() {
@ -31,129 +31,131 @@ func init() {
}, nil) }, nil)
} }
type TripleDirectionIndex struct { type QuadDirectionIndex struct {
subject map[int64]*llrb.LLRB index [4]map[int64]*b.Tree
predicate map[int64]*llrb.LLRB
object map[int64]*llrb.LLRB
label map[int64]*llrb.LLRB
} }
func NewTripleDirectionIndex() *TripleDirectionIndex { func NewQuadDirectionIndex() QuadDirectionIndex {
var tdi TripleDirectionIndex return QuadDirectionIndex{[...]map[int64]*b.Tree{
tdi.subject = make(map[int64]*llrb.LLRB) quad.Subject - 1: make(map[int64]*b.Tree),
tdi.predicate = make(map[int64]*llrb.LLRB) quad.Predicate - 1: make(map[int64]*b.Tree),
tdi.object = make(map[int64]*llrb.LLRB) quad.Object - 1: make(map[int64]*b.Tree),
tdi.label = make(map[int64]*llrb.LLRB) quad.Label - 1: make(map[int64]*b.Tree),
return &tdi }}
} }
func (tdi *TripleDirectionIndex) GetForDir(d quad.Direction) map[int64]*llrb.LLRB { func (qdi QuadDirectionIndex) Tree(d quad.Direction, id int64) *b.Tree {
switch d { if d < quad.Subject || d > quad.Label {
case quad.Subject:
return tdi.subject
case quad.Object:
return tdi.object
case quad.Predicate:
return tdi.predicate
case quad.Label:
return tdi.label
}
panic("illegal direction") panic("illegal direction")
}
func (tdi *TripleDirectionIndex) GetOrCreate(d quad.Direction, id int64) *llrb.LLRB {
directionIndex := tdi.GetForDir(d)
if _, ok := directionIndex[id]; !ok {
directionIndex[id] = llrb.New()
} }
return directionIndex[id] tree, ok := qdi.index[d-1][id]
if !ok {
tree = b.TreeNew(cmp)
qdi.index[d-1][id] = tree
}
return tree
} }
func (tdi *TripleDirectionIndex) Get(d quad.Direction, id int64) (*llrb.LLRB, bool) { func (qdi QuadDirectionIndex) Get(d quad.Direction, id int64) (*b.Tree, bool) {
directionIndex := tdi.GetForDir(d) if d < quad.Subject || d > quad.Label {
tree, exists := directionIndex[id] panic("illegal direction")
return tree, exists }
tree, ok := qdi.index[d-1][id]
return tree, ok
}
type LogEntry struct {
graph.Delta
DeletedBy int64
} }
type TripleStore struct { type TripleStore struct {
idCounter int64 idCounter int64
tripleIdCounter int64 quadIdCounter int64
idMap map[string]int64 idMap map[string]int64
revIdMap map[int64]string revIdMap map[int64]string
triples []quad.Quad log []LogEntry
size int64 size int64
index TripleDirectionIndex index QuadDirectionIndex
// vip_index map[string]map[int64]map[string]map[int64]*llrb.Tree // vip_index map[string]map[int64]map[string]map[int64]*b.Tree
} }
func newTripleStore() *TripleStore { func newTripleStore() *TripleStore {
var ts TripleStore return &TripleStore{
ts.idMap = make(map[string]int64) idMap: make(map[string]int64),
ts.revIdMap = make(map[int64]string) revIdMap: make(map[int64]string),
ts.triples = make([]quad.Quad, 1, 200)
// Sentinel null triple so triple indices start at 1 // Sentinel null entry so indices start at 1
ts.triples[0] = quad.Quad{} log: make([]LogEntry, 1, 200),
ts.size = 1
ts.index = *NewTripleDirectionIndex()
ts.idCounter = 1
ts.tripleIdCounter = 1
return &ts
}
func (ts *TripleStore) AddTripleSet(triples []quad.Quad) { index: NewQuadDirectionIndex(),
for _, t := range triples { idCounter: 1,
ts.AddTriple(t) quadIdCounter: 1,
} }
} }
func (ts *TripleStore) tripleExists(t quad.Quad) (bool, int64) { func (ts *TripleStore) ApplyDeltas(deltas []graph.Delta) error {
smallest := -1 for _, d := range deltas {
var smallest_tree *llrb.LLRB var err error
if d.Action == graph.Add {
err = ts.AddDelta(d)
} else {
err = ts.RemoveDelta(d)
}
if err != nil {
return err
}
}
return nil
}
const maxInt = int(^uint(0) >> 1)
func (ts *TripleStore) indexOf(t quad.Quad) (int64, bool) {
min := maxInt
var tree *b.Tree
for d := quad.Subject; d <= quad.Label; d++ { for d := quad.Subject; d <= quad.Label; d++ {
sid := t.Get(d) sid := t.Get(d)
if d == quad.Label && sid == "" { if d == quad.Label && sid == "" {
continue continue
} }
id, ok := ts.idMap[sid] id, ok := ts.idMap[sid]
// If we've never heard about a node, it most not exist // If we've never heard about a node, it must not exist
if !ok { if !ok {
return false, 0 return 0, false
} }
index, exists := ts.index.Get(d, id) index, ok := ts.index.Get(d, id)
if !exists { if !ok {
// If it's never been indexed in this direction, it can't exist. // If it's never been indexed in this direction, it can't exist.
return false, 0 return 0, false
} }
if smallest == -1 || index.Len() < smallest { if l := index.Len(); l < min {
smallest = index.Len() min, tree = l, index
smallest_tree = index
} }
} }
it := NewLlrbIterator(smallest_tree, "") it := NewIterator(tree, "", ts)
for it.Next() { for it.Next() {
val := it.Result() val := it.Result()
if t == ts.triples[val.(int64)] { if t == ts.log[val.(int64)].Quad {
return true, val.(int64) return val.(int64), true
} }
} }
return false, 0 return 0, false
} }
func (ts *TripleStore) AddTriple(t quad.Quad) { func (ts *TripleStore) AddDelta(d graph.Delta) error {
if exists, _ := ts.tripleExists(t); exists { if _, exists := ts.indexOf(d.Quad); exists {
return return graph.ErrQuadExists
} }
var tripleID int64 qid := ts.quadIdCounter
ts.triples = append(ts.triples, t) ts.log = append(ts.log, LogEntry{Delta: d})
tripleID = ts.tripleIdCounter
ts.size++ ts.size++
ts.tripleIdCounter++ ts.quadIdCounter++
for d := quad.Subject; d <= quad.Label; d++ { for dir := quad.Subject; dir <= quad.Label; dir++ {
sid := t.Get(d) sid := d.Quad.Get(dir)
if d == quad.Label && sid == "" { if dir == quad.Label && sid == "" {
continue continue
} }
if _, ok := ts.idMap[sid]; !ok { if _, ok := ts.idMap[sid]; !ok {
@ -163,87 +165,60 @@ func (ts *TripleStore) AddTriple(t quad.Quad) {
} }
} }
for d := quad.Subject; d <= quad.Label; d++ { for dir := quad.Subject; dir <= quad.Label; dir++ {
if d == quad.Label && t.Get(d) == "" { if dir == quad.Label && d.Quad.Get(dir) == "" {
continue continue
} }
id := ts.idMap[t.Get(d)] id := ts.idMap[d.Quad.Get(dir)]
tree := ts.index.GetOrCreate(d, id) tree := ts.index.Tree(dir, id)
tree.ReplaceOrInsert(Int64(tripleID)) tree.Set(qid, struct{}{})
} }
// TODO(barakmich): Add VIP indexing // TODO(barakmich): Add VIP indexing
return nil
} }
func (ts *TripleStore) RemoveTriple(t quad.Quad) { func (ts *TripleStore) RemoveDelta(d graph.Delta) error {
var tripleID int64 prevQuadID, exists := ts.indexOf(d.Quad)
var exists bool if !exists {
tripleID = 0 return graph.ErrQuadNotExist
if exists, tripleID = ts.tripleExists(t); !exists {
return
} }
ts.triples[tripleID] = quad.Quad{} quadID := ts.quadIdCounter
ts.log = append(ts.log, LogEntry{Delta: d})
ts.log[prevQuadID].DeletedBy = quadID
ts.size-- ts.size--
ts.quadIdCounter++
for d := quad.Subject; d <= quad.Label; d++ { return nil
if d == quad.Label && t.Get(d) == "" {
continue
}
id := ts.idMap[t.Get(d)]
tree := ts.index.GetOrCreate(d, id)
tree.Delete(Int64(tripleID))
}
for d := quad.Subject; d <= quad.Label; d++ {
if d == quad.Label && t.Get(d) == "" {
continue
}
id, ok := ts.idMap[t.Get(d)]
if !ok {
continue
}
stillExists := false
for d := quad.Subject; d <= quad.Label; d++ {
if d == quad.Label && t.Get(d) == "" {
continue
}
nodeTree := ts.index.GetOrCreate(d, id)
if nodeTree.Len() != 0 {
stillExists = true
break
}
}
if !stillExists {
delete(ts.idMap, t.Get(d))
delete(ts.revIdMap, id)
}
}
} }
func (ts *TripleStore) Quad(index graph.Value) quad.Quad { func (ts *TripleStore) Quad(index graph.Value) quad.Quad {
return ts.triples[index.(int64)] return ts.log[index.(int64)].Quad
} }
func (ts *TripleStore) TripleIterator(d quad.Direction, value graph.Value) graph.Iterator { func (ts *TripleStore) TripleIterator(d quad.Direction, value graph.Value) graph.Iterator {
index, ok := ts.index.Get(d, value.(int64)) index, ok := ts.index.Get(d, value.(int64))
data := fmt.Sprintf("dir:%s val:%d", d, value.(int64)) data := fmt.Sprintf("dir:%s val:%d", d, value.(int64))
if ok { if ok {
return NewLlrbIterator(index, data) return NewIterator(index, data, ts)
} }
return &iterator.Null{} return &iterator.Null{}
} }
func (ts *TripleStore) Horizon() int64 {
return ts.log[len(ts.log)-1].ID
}
func (ts *TripleStore) Size() int64 { func (ts *TripleStore) Size() int64 {
return ts.size - 1 // Don't count the sentinel return ts.size
} }
func (ts *TripleStore) DebugPrint() { func (ts *TripleStore) DebugPrint() {
for i, t := range ts.triples { for i, l := range ts.log {
if i == 0 { if i == 0 {
continue continue
} }
glog.V(2).Infof("%d: %s", i, t) glog.V(2).Infof("%d: %#v", i, l)
} }
} }
@ -256,7 +231,7 @@ func (ts *TripleStore) NameOf(id graph.Value) string {
} }
func (ts *TripleStore) TriplesAllIterator() graph.Iterator { func (ts *TripleStore) TriplesAllIterator() graph.Iterator {
return iterator.NewInt64(0, ts.Size()) return NewMemstoreQuadsAllIterator(ts)
} }
func (ts *TripleStore) FixedIterator() graph.FixedIterator { func (ts *TripleStore) FixedIterator() graph.FixedIterator {
@ -269,6 +244,7 @@ func (ts *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph.
} }
func (ts *TripleStore) NodesAllIterator() graph.Iterator { func (ts *TripleStore) NodesAllIterator() graph.Iterator {
return NewMemstoreAllIterator(ts) return NewMemstoreNodesAllIterator(ts)
} }
func (ts *TripleStore) Close() {} func (ts *TripleStore) Close() {}

View file

@ -22,6 +22,7 @@ import (
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator" "github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
"github.com/google/cayley/writer"
) )
// This is a simple test graph. // This is a simple test graph.
@ -51,13 +52,14 @@ var simpleGraph = []quad.Quad{
{"G", "status", "cool", "status_graph"}, {"G", "status", "cool", "status_graph"},
} }
func makeTestStore(data []quad.Quad) (*TripleStore, []pair) { func makeTestStore(data []quad.Quad) (*TripleStore, graph.QuadWriter, []pair) {
seen := make(map[string]struct{}) seen := make(map[string]struct{})
ts := newTripleStore() ts := newTripleStore()
var ( var (
val int64 val int64
ind []pair ind []pair
) )
writer, _ := writer.NewSingleReplication(ts, nil)
for _, t := range data { for _, t := range data {
for _, qp := range []string{t.Subject, t.Predicate, t.Object, t.Label} { for _, qp := range []string{t.Subject, t.Predicate, t.Object, t.Label} {
if _, ok := seen[qp]; !ok && qp != "" { if _, ok := seen[qp]; !ok && qp != "" {
@ -66,9 +68,10 @@ func makeTestStore(data []quad.Quad) (*TripleStore, []pair) {
seen[qp] = struct{}{} seen[qp] = struct{}{}
} }
} }
ts.AddTriple(t)
writer.AddQuad(t)
} }
return ts, ind return ts, writer, ind
} }
type pair struct { type pair struct {
@ -77,7 +80,7 @@ type pair struct {
} }
func TestMemstore(t *testing.T) { func TestMemstore(t *testing.T) {
ts, index := makeTestStore(simpleGraph) ts, _, index := makeTestStore(simpleGraph)
if size := ts.Size(); size != int64(len(simpleGraph)) { if size := ts.Size(); size != int64(len(simpleGraph)) {
t.Errorf("Triple store has unexpected size, got:%d expected %d", size, len(simpleGraph)) t.Errorf("Triple store has unexpected size, got:%d expected %d", size, len(simpleGraph))
} }
@ -95,7 +98,7 @@ func TestMemstore(t *testing.T) {
} }
func TestIteratorsAndNextResultOrderA(t *testing.T) { func TestIteratorsAndNextResultOrderA(t *testing.T) {
ts, _ := makeTestStore(simpleGraph) ts, _, _ := makeTestStore(simpleGraph)
fixed := ts.FixedIterator() fixed := ts.FixedIterator()
fixed.Add(ts.ValueOf("C")) fixed.Add(ts.ValueOf("C"))
@ -144,7 +147,7 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) {
} }
func TestLinksToOptimization(t *testing.T) { func TestLinksToOptimization(t *testing.T) {
ts, _ := makeTestStore(simpleGraph) ts, _, _ := makeTestStore(simpleGraph)
fixed := ts.FixedIterator() fixed := ts.FixedIterator()
fixed.Add(ts.ValueOf("cool")) fixed.Add(ts.ValueOf("cool"))
@ -172,9 +175,9 @@ func TestLinksToOptimization(t *testing.T) {
} }
func TestRemoveTriple(t *testing.T) { func TestRemoveTriple(t *testing.T) {
ts, _ := makeTestStore(simpleGraph) ts, w, _ := makeTestStore(simpleGraph)
ts.RemoveTriple(quad.Quad{"E", "follows", "F", ""}) w.RemoveQuad(quad.Quad{"E", "follows", "F", ""})
fixed := ts.FixedIterator() fixed := ts.FixedIterator()
fixed.Add(ts.ValueOf("E")) fixed.Add(ts.ValueOf("E"))

View file

@ -45,17 +45,7 @@ type Iterator struct {
func NewIterator(qs *TripleStore, collection string, d quad.Direction, val graph.Value) *Iterator { func NewIterator(qs *TripleStore, collection string, d quad.Direction, val graph.Value) *Iterator {
name := qs.NameOf(val) name := qs.NameOf(val)
var constraint bson.M constraint := bson.M{d.String(): name}
switch d {
case quad.Subject:
constraint = bson.M{"Subject": name}
case quad.Predicate:
constraint = bson.M{"Predicate": name}
case quad.Object:
constraint = bson.M{"Object": name}
case quad.Label:
constraint = bson.M{"Label": name}
}
size, err := qs.db.C(collection).Find(constraint).Count() size, err := qs.db.C(collection).Find(constraint).Count()
if err != nil { if err != nil {
@ -141,9 +131,8 @@ func (it *Iterator) Clone() graph.Iterator {
func (it *Iterator) Next() bool { func (it *Iterator) Next() bool {
var result struct { var result struct {
Id string "_id" Id string "_id"
//Sub string "Sub" Added []int64 "Added"
//Pred string "Pred" Deleted []int64 "Deleted"
//Obj string "Obj"
} }
found := it.iter.Next(&result) found := it.iter.Next(&result)
if !found { if !found {
@ -153,6 +142,9 @@ func (it *Iterator) Next() bool {
} }
return false return false
} }
if it.collection == "quads" && len(result.Added) <= len(result.Deleted) {
return it.Next()
}
it.result = result.Id it.result = result.Id
return true return true
} }

View file

@ -18,7 +18,6 @@ import (
"crypto/sha1" "crypto/sha1"
"encoding/hex" "encoding/hex"
"hash" "hash"
"io"
"sync" "sync"
"gopkg.in/mgo.v2" "gopkg.in/mgo.v2"
@ -34,9 +33,6 @@ func init() {
graph.RegisterTripleStore("mongo", true, newTripleStore, createNewMongoGraph) graph.RegisterTripleStore("mongo", true, newTripleStore, createNewMongoGraph)
} }
// Guarantee we satisfy graph.Bulkloader.
var _ graph.BulkLoader = (*TripleStore)(nil)
const DefaultDBName = "cayley" const DefaultDBName = "cayley"
var ( var (
@ -64,19 +60,27 @@ func createNewMongoGraph(addr string, options graph.Options) error {
} }
db := conn.DB(dbName) db := conn.DB(dbName)
indexOpts := mgo.Index{ indexOpts := mgo.Index{
Key: []string{"Sub"}, Key: []string{"subject"},
Unique: false, Unique: false,
DropDups: false, DropDups: false,
Background: true, Background: true,
Sparse: true, Sparse: true,
} }
db.C("triples").EnsureIndex(indexOpts) db.C("quads").EnsureIndex(indexOpts)
indexOpts.Key = []string{"Pred"} indexOpts.Key = []string{"predicate"}
db.C("triples").EnsureIndex(indexOpts) db.C("quads").EnsureIndex(indexOpts)
indexOpts.Key = []string{"Obj"} indexOpts.Key = []string{"object"}
db.C("triples").EnsureIndex(indexOpts) db.C("quads").EnsureIndex(indexOpts)
indexOpts.Key = []string{"Label"} indexOpts.Key = []string{"label"}
db.C("triples").EnsureIndex(indexOpts) db.C("quads").EnsureIndex(indexOpts)
logOpts := mgo.Index{
Key: []string{"LogID"},
Unique: true,
DropDups: false,
Background: true,
Sparse: true,
}
db.C("log").EnsureIndex(logOpts)
return nil return nil
} }
@ -97,7 +101,7 @@ func newTripleStore(addr string, options graph.Options) (graph.TripleStore, erro
return &qs, nil return &qs, nil
} }
func (qs *TripleStore) getIdForTriple(t quad.Quad) string { func (qs *TripleStore) getIdForQuad(t quad.Quad) string {
id := qs.convertStringToByteHash(t.Subject) id := qs.convertStringToByteHash(t.Subject)
id += qs.convertStringToByteHash(t.Predicate) id += qs.convertStringToByteHash(t.Predicate)
id += qs.convertStringToByteHash(t.Object) id += qs.convertStringToByteHash(t.Object)
@ -122,125 +126,157 @@ type MongoNode struct {
Size int "Size" Size int "Size"
} }
func (qs *TripleStore) updateNodeBy(node_name string, inc int) { type MongoLogEntry struct {
var size MongoNode LogID int64 "LogID"
node := qs.ValueOf(node_name) Action string "Action"
err := qs.db.C("nodes").FindId(node).One(&size) Key string "Key"
if err != nil { Timestamp int64
if err.Error() == "not found" {
// Not found. Okay.
size.Id = node.(string)
size.Name = node_name
size.Size = inc
} else {
glog.Errorf("Error: %v", err)
return
}
} else {
size.Id = node.(string)
size.Name = node_name
size.Size += inc
}
// Removing something...
if inc < 0 {
if size.Size <= 0 {
err := qs.db.C("nodes").RemoveId(node)
if err != nil {
glog.Errorf("Error: %v while removing node %s", err, node_name)
return
}
}
}
_, err2 := qs.db.C("nodes").UpsertId(node, size)
if err2 != nil {
glog.Errorf("Error: %v", err)
}
} }
func (qs *TripleStore) writeTriple(t quad.Quad) bool { func (qs *TripleStore) updateNodeBy(node_name string, inc int) error {
tripledoc := bson.M{ node := qs.ValueOf(node_name)
"_id": qs.getIdForTriple(t), doc := bson.M{
"Subject": t.Subject, "_id": node.(string),
"Predicate": t.Predicate, "Name": node_name,
"Object": t.Object,
"Label": t.Label,
} }
err := qs.db.C("triples").Insert(tripledoc) upsert := bson.M{
"$setOnInsert": doc,
"$inc": bson.M{
"Size": inc,
},
}
_, err := qs.db.C("nodes").UpsertId(node, upsert)
if err != nil { if err != nil {
// Among the reasons I hate MongoDB. "Errors don't happen! Right guys?" glog.Errorf("Error updating node: %v", err)
if err.(*mgo.LastError).Code == 11000 { }
return err
}
func (qs *TripleStore) updateQuad(q quad.Quad, id int64, proc graph.Procedure) error {
var setname string
if proc == graph.Add {
setname = "Added"
} else if proc == graph.Delete {
setname = "Deleted"
}
upsert := bson.M{
"$setOnInsert": q,
"$push": bson.M{
setname: id,
},
}
_, err := qs.db.C("quads").UpsertId(qs.getIdForQuad(q), upsert)
if err != nil {
glog.Errorf("Error: %v", err)
}
return err
}
func (qs *TripleStore) checkValid(key string) bool {
var indexEntry struct {
Added []int64 "Added"
Deleted []int64 "Deleted"
}
err := qs.db.C("quads").FindId(key).One(&indexEntry)
if err == mgo.ErrNotFound {
return false return false
} }
glog.Errorf("Error: %v", err) if err != nil {
glog.Errorln("Other error checking valid quad: %s %v.", key, err)
return false
}
if len(indexEntry.Added) <= len(indexEntry.Deleted) {
return false return false
} }
return true return true
} }
func (qs *TripleStore) AddTriple(t quad.Quad) { func (qs *TripleStore) updateLog(d graph.Delta) error {
_ = qs.writeTriple(t) var action string
qs.updateNodeBy(t.Subject, 1) if d.Action == graph.Add {
qs.updateNodeBy(t.Predicate, 1) action = "Add"
qs.updateNodeBy(t.Object, 1) } else {
if t.Label != "" { action = "Delete"
qs.updateNodeBy(t.Label, 1)
} }
entry := MongoLogEntry{
LogID: d.ID,
Action: action,
Key: qs.getIdForQuad(d.Quad),
Timestamp: d.Timestamp.UnixNano(),
}
err := qs.db.C("log").Insert(entry)
if err != nil {
glog.Errorf("Error updating log: %v", err)
}
return err
} }
func (qs *TripleStore) AddTripleSet(in []quad.Quad) { func (qs *TripleStore) ApplyDeltas(in []graph.Delta) error {
qs.session.SetSafe(nil) qs.session.SetSafe(nil)
ids := make(map[string]int) ids := make(map[string]int)
for _, t := range in { // Pre-check the existence condition.
wrote := qs.writeTriple(t) for _, d := range in {
if wrote { key := qs.getIdForQuad(d.Quad)
ids[t.Subject]++ switch d.Action {
ids[t.Object]++ case graph.Add:
ids[t.Predicate]++ if qs.checkValid(key) {
if t.Label != "" { return graph.ErrQuadExists
ids[t.Label]++
} }
case graph.Delete:
if !qs.checkValid(key) {
return graph.ErrQuadNotExist
}
}
}
if glog.V(2) {
glog.Infoln("Existence verified. Proceeding.")
}
for _, d := range in {
err := qs.updateLog(d)
if err != nil {
return err
}
}
for _, d := range in {
err := qs.updateQuad(d.Quad, d.ID, d.Action)
if err != nil {
return err
}
var countdelta int
if d.Action == graph.Add {
countdelta = 1
} else {
countdelta = -1
}
ids[d.Quad.Subject] += countdelta
ids[d.Quad.Object] += countdelta
ids[d.Quad.Predicate] += countdelta
if d.Quad.Label != "" {
ids[d.Quad.Label] += countdelta
} }
} }
for k, v := range ids { for k, v := range ids {
qs.updateNodeBy(k, v) err := qs.updateNodeBy(k, v)
if err != nil {
return err
}
} }
qs.session.SetSafe(&mgo.Safe{}) qs.session.SetSafe(&mgo.Safe{})
} return nil
func (qs *TripleStore) RemoveTriple(t quad.Quad) {
err := qs.db.C("triples").RemoveId(qs.getIdForTriple(t))
if err == mgo.ErrNotFound {
return
} else if err != nil {
glog.Errorf("Error: %v while removing triple %v", err, t)
return
}
qs.updateNodeBy(t.Subject, -1)
qs.updateNodeBy(t.Predicate, -1)
qs.updateNodeBy(t.Object, -1)
if t.Label != "" {
qs.updateNodeBy(t.Label, -1)
}
} }
func (qs *TripleStore) Quad(val graph.Value) quad.Quad { func (qs *TripleStore) Quad(val graph.Value) quad.Quad {
var bsonDoc bson.M var q quad.Quad
err := qs.db.C("triples").FindId(val.(string)).One(&bsonDoc) err := qs.db.C("quads").FindId(val.(string)).One(&q)
if err != nil { if err != nil {
glog.Errorf("Error: Couldn't retrieve triple %s %v", val, err) glog.Errorf("Error: Couldn't retrieve quad %s %v", val, err)
}
return quad.Quad{
bsonDoc["Subject"].(string),
bsonDoc["Predicate"].(string),
bsonDoc["Object"].(string),
bsonDoc["Label"].(string),
} }
return q
} }
func (qs *TripleStore) TripleIterator(d quad.Direction, val graph.Value) graph.Iterator { func (qs *TripleStore) TripleIterator(d quad.Direction, val graph.Value) graph.Iterator {
return NewIterator(qs, "triples", d, val) return NewIterator(qs, "quads", d, val)
} }
func (qs *TripleStore) NodesAllIterator() graph.Iterator { func (qs *TripleStore) NodesAllIterator() graph.Iterator {
@ -248,7 +284,7 @@ func (qs *TripleStore) NodesAllIterator() graph.Iterator {
} }
func (qs *TripleStore) TriplesAllIterator() graph.Iterator { func (qs *TripleStore) TriplesAllIterator() graph.Iterator {
return NewAllIterator(qs, "triples") return NewAllIterator(qs, "quads")
} }
func (qs *TripleStore) ValueOf(s string) graph.Value { func (qs *TripleStore) ValueOf(s string) graph.Value {
@ -270,7 +306,8 @@ func (qs *TripleStore) NameOf(v graph.Value) string {
} }
func (qs *TripleStore) Size() int64 { func (qs *TripleStore) Size() int64 {
count, err := qs.db.C("triples").Count() // TODO(barakmich): Make size real; store it in the log, and retrieve it.
count, err := qs.db.C("quads").Count()
if err != nil { if err != nil {
glog.Errorf("Error: %v", err) glog.Errorf("Error: %v", err)
return 0 return 0
@ -278,6 +315,18 @@ func (qs *TripleStore) Size() int64 {
return int64(count) return int64(count)
} }
func (qs *TripleStore) Horizon() int64 {
var log MongoLogEntry
err := qs.db.C("log").Find(nil).Sort("-LogID").One(&log)
if err != nil {
if err == mgo.ErrNotFound {
return 0
}
glog.Errorf("Could not get Horizon from Mongo: %v", err)
}
return log.LogID
}
func compareStrings(a, b graph.Value) bool { func compareStrings(a, b graph.Value) bool {
return a.(string) == b.(string) return a.(string) == b.(string)
} }
@ -307,61 +356,4 @@ func (qs *TripleStore) TripleDirection(in graph.Value, d quad.Direction) graph.V
return val return val
} }
func (qs *TripleStore) BulkLoad(dec quad.Unmarshaler) error { // TODO(barakmich): Rewrite bulk loader. For now, iterating around blocks is the way we'll go about it.
if qs.Size() != 0 {
return graph.ErrCannotBulkLoad
}
qs.session.SetSafe(nil)
for {
q, err := dec.Unmarshal()
if err != nil {
if err != io.EOF {
return err
}
break
}
qs.writeTriple(q)
}
outputTo := bson.M{"replace": "nodes", "sharded": true}
glog.Infoln("Mapreducing")
job := mgo.MapReduce{
Map: `function() {
var len = this["_id"].length
var s_key = this["_id"].slice(0, len / 4)
var p_key = this["_id"].slice(len / 4, 2 * len / 4)
var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4)
var c_key = this["_id"].slice(3 * len / 4)
emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1})
emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1})
emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1})
if (this.Label != "") {
emit(c_key, {"_id": c_key, "Name" : this.Label, "Size" : 1})
}
}
`,
Reduce: `
function(key, value_list) {
out = {"_id": key, "Name": value_list[0].Name}
count = 0
for (var i = 0; i < value_list.length; i++) {
count = count + value_list[i].Size
}
out["Size"] = count
return out
}
`,
Out: outputTo,
}
qs.db.C("triples").Find(nil).MapReduce(&job, nil)
glog.Infoln("Fixing")
qs.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) {
db.nodes.update({"_id": result._id}, result.value)
}) }`}, {"args", bson.D{}}}, nil)
qs.session.SetSafe(&mgo.Safe{})
return nil
}

101
graph/quadwriter.go Normal file
View file

@ -0,0 +1,101 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package graph
// Defines the interface for consistent replication of a graph instance.
//
// Separate from the backend, this dictates how individual triples get
// identified and replicated consistently across (potentially) multiple
// instances. The simplest case is to keep an append-only log of triple
// changes.
import (
"errors"
"time"
"github.com/google/cayley/quad"
)
type Procedure byte
// The different types of actions a transaction can do.
const (
Add Procedure = iota
Delete
)
type Delta struct {
ID int64
Quad quad.Quad
Action Procedure
Timestamp time.Time
}
type Handle struct {
QuadStore TripleStore
QuadWriter QuadWriter
}
func (h *Handle) Close() {
h.QuadStore.Close()
h.QuadWriter.Close()
}
var (
ErrQuadExists = errors.New("Quad exists")
ErrQuadNotExist = errors.New("Quad doesn't exist")
)
type QuadWriter interface {
// Add a quad to the store.
AddQuad(quad.Quad) error
// Add a set of quads to the store, atomically if possible.
AddQuadSet([]quad.Quad) error
// Removes a quad matching the given one from the database,
// if it exists. Does nothing otherwise.
RemoveQuad(quad.Quad) error
// Cleans up replication and closes the writing aspect of the database.
Close() error
}
type NewQuadWriterFunc func(TripleStore, Options) (QuadWriter, error)
var writerRegistry = make(map[string]NewQuadWriterFunc)
func RegisterWriter(name string, newFunc NewQuadWriterFunc) {
if _, found := writerRegistry[name]; found {
panic("already registered TripleWriter " + name)
}
writerRegistry[name] = newFunc
}
func NewQuadWriter(name string, ts TripleStore, opts Options) (QuadWriter, error) {
newFunc, hasNew := writerRegistry[name]
if !hasNew {
return nil, errors.New("replication: name '" + name + "' is not registered")
}
return newFunc(ts, opts)
}
func WriterMethods() []string {
t := make([]string, 0, len(writerRegistry))
for n := range writerRegistry {
t = append(t, n)
}
return t
}

View file

@ -42,15 +42,9 @@ import (
type Value interface{} type Value interface{}
type TripleStore interface { type TripleStore interface {
// Add a triple to the store. // The only way in is through building a transaction, which
AddTriple(quad.Quad) // is done by a replication strategy.
ApplyDeltas([]Delta) error
// Add a set of triples to the store, atomically if possible.
AddTripleSet([]quad.Quad)
// Removes a triple matching the given one from the database,
// if it exists. Does nothing otherwise.
RemoveTriple(quad.Quad)
// Given an opaque token, returns the triple for that token from the store. // Given an opaque token, returns the triple for that token from the store.
Quad(Value) quad.Quad Quad(Value) quad.Quad
@ -75,6 +69,9 @@ type TripleStore interface {
// Returns the number of triples currently stored. // Returns the number of triples currently stored.
Size() int64 Size() int64
// The last replicated transaction ID that this triplestore has verified.
Horizon() int64
// Creates a fixed iterator which can compare Values // Creates a fixed iterator which can compare Values
FixedIterator() FixedIterator FixedIterator() FixedIterator

View file

@ -107,7 +107,7 @@ func (h *TemplateRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
type Api struct { type Api struct {
config *config.Config config *config.Config
ts graph.TripleStore handle *graph.Handle
} }
func (api *Api) ApiV1(r *httprouter.Router) { func (api *Api) ApiV1(r *httprouter.Router) {
@ -119,7 +119,7 @@ func (api *Api) ApiV1(r *httprouter.Router) {
r.POST("/api/v1/delete", LogRequest(api.ServeV1Delete)) r.POST("/api/v1/delete", LogRequest(api.ServeV1Delete))
} }
func SetupRoutes(ts graph.TripleStore, cfg *config.Config) { func SetupRoutes(handle *graph.Handle, cfg *config.Config) {
r := httprouter.New() r := httprouter.New()
assets := findAssetsPath() assets := findAssetsPath()
if glog.V(2) { if glog.V(2) {
@ -129,7 +129,7 @@ func SetupRoutes(ts graph.TripleStore, cfg *config.Config) {
templates.ParseGlob(fmt.Sprint(assets, "/templates/*.html")) templates.ParseGlob(fmt.Sprint(assets, "/templates/*.html"))
root := &TemplateRequestHandler{templates: templates} root := &TemplateRequestHandler{templates: templates}
docs := &DocRequestHandler{assets: assets} docs := &DocRequestHandler{assets: assets}
api := &Api{config: cfg, ts: ts} api := &Api{config: cfg, handle: handle}
api.ApiV1(r) api.ApiV1(r)
//m.Use(martini.Static("static", martini.StaticOptions{Prefix: "/static", SkipLogging: true})) //m.Use(martini.Static("static", martini.StaticOptions{Prefix: "/static", SkipLogging: true}))
@ -141,8 +141,8 @@ func SetupRoutes(ts graph.TripleStore, cfg *config.Config) {
http.Handle("/", r) http.Handle("/", r)
} }
func Serve(ts graph.TripleStore, cfg *config.Config) { func Serve(handle *graph.Handle, cfg *config.Config) {
SetupRoutes(ts, cfg) SetupRoutes(handle, cfg)
glog.Infof("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort) glog.Infof("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort)
fmt.Printf("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort) fmt.Printf("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort)
err := http.ListenAndServe(fmt.Sprintf("%s:%s", cfg.ListenHost, cfg.ListenPort), nil) err := http.ListenAndServe(fmt.Sprintf("%s:%s", cfg.ListenHost, cfg.ListenPort), nil)

View file

@ -71,9 +71,9 @@ func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params http
var ses query.HttpSession var ses query.HttpSession
switch params.ByName("query_lang") { switch params.ByName("query_lang") {
case "gremlin": case "gremlin":
ses = gremlin.NewSession(api.ts, api.config.Timeout, false) ses = gremlin.NewSession(api.handle.QuadStore, api.config.Timeout, false)
case "mql": case "mql":
ses = mql.NewSession(api.ts) ses = mql.NewSession(api.handle.QuadStore)
default: default:
return FormatJson400(w, "Need a query language.") return FormatJson400(w, "Need a query language.")
} }
@ -110,18 +110,15 @@ func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params http
ses = nil ses = nil
return FormatJsonError(w, 500, "Incomplete data?") return FormatJsonError(w, 500, "Incomplete data?")
} }
http.Error(w, "", http.StatusNotFound)
ses = nil
return http.StatusNotFound
} }
func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params httprouter.Params) int {
var ses query.HttpSession var ses query.HttpSession
switch params.ByName("query_lang") { switch params.ByName("query_lang") {
case "gremlin": case "gremlin":
ses = gremlin.NewSession(api.ts, api.config.Timeout, false) ses = gremlin.NewSession(api.handle.QuadStore, api.config.Timeout, false)
case "mql": case "mql":
ses = mql.NewSession(api.ts) ses = mql.NewSession(api.handle.QuadStore)
default: default:
return FormatJson400(w, "Need a query language.") return FormatJson400(w, "Need a query language.")
} }
@ -146,6 +143,4 @@ func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params http
default: default:
return FormatJsonError(w, 500, "Incomplete data?") return FormatJsonError(w, 500, "Incomplete data?")
} }
http.Error(w, "", http.StatusNotFound)
return http.StatusNotFound
} }

View file

@ -55,7 +55,7 @@ func (api *Api) ServeV1Write(w http.ResponseWriter, r *http.Request, _ httproute
if terr != nil { if terr != nil {
return FormatJson400(w, terr) return FormatJson400(w, terr)
} }
api.ts.AddTripleSet(tripleList) api.handle.QuadWriter.AddQuadSet(tripleList)
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", len(tripleList)) fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", len(tripleList))
return 200 return 200
} }
@ -97,11 +97,11 @@ func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params
block = append(block, t) block = append(block, t)
n++ n++
if len(block) == cap(block) { if len(block) == cap(block) {
api.ts.AddTripleSet(block) api.handle.QuadWriter.AddQuadSet(block)
block = block[:0] block = block[:0]
} }
} }
api.ts.AddTripleSet(block) api.handle.QuadWriter.AddQuadSet(block)
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", n) fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", n)
@ -122,7 +122,7 @@ func (api *Api) ServeV1Delete(w http.ResponseWriter, r *http.Request, params htt
} }
count := 0 count := 0
for _, triple := range tripleList { for _, triple := range tripleList {
api.ts.RemoveTriple(triple) api.handle.QuadWriter.RemoveQuad(triple)
count++ count++
} }
fmt.Fprintf(w, "{\"result\": \"Successfully deleted %d triples.\"}", count) fmt.Fprintf(w, "{\"result\": \"Successfully deleted %d triples.\"}", count)

View file

@ -23,6 +23,7 @@ import (
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
_ "github.com/google/cayley/graph/memstore" _ "github.com/google/cayley/graph/memstore"
_ "github.com/google/cayley/writer"
) )
// This is a simple test graph. // This is a simple test graph.
@ -54,8 +55,9 @@ var simpleGraph = []quad.Quad{
func makeTestSession(data []quad.Quad) *Session { func makeTestSession(data []quad.Quad) *Session {
ts, _ := graph.NewTripleStore("memstore", "", nil) ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
for _, t := range data { for _, t := range data {
ts.AddTriple(t) w.AddQuad(t)
} }
return NewSession(ts, -1, false) return NewSession(ts, -1, false)
} }

View file

@ -22,6 +22,7 @@ import (
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
_ "github.com/google/cayley/graph/memstore" _ "github.com/google/cayley/graph/memstore"
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
_ "github.com/google/cayley/writer"
) )
// This is a simple test graph. // This is a simple test graph.
@ -53,8 +54,9 @@ var simpleGraph = []quad.Quad{
func makeTestSession(data []quad.Quad) *Session { func makeTestSession(data []quad.Quad) *Session {
ts, _ := graph.NewTripleStore("memstore", "", nil) ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
for _, t := range data { for _, t := range data {
ts.AddTriple(t) w.AddQuad(t)
} }
return NewSession(ts) return NewSession(ts)
} }

View file

@ -21,6 +21,7 @@ import (
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
_ "github.com/google/cayley/graph/memstore" _ "github.com/google/cayley/graph/memstore"
_ "github.com/google/cayley/writer"
) )
func TestBadParse(t *testing.T) { func TestBadParse(t *testing.T) {
@ -55,13 +56,14 @@ var testQueries = []struct {
func TestMemstoreBackedSexp(t *testing.T) { func TestMemstoreBackedSexp(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil) ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
it := BuildIteratorTreeForQuery(ts, "()") it := BuildIteratorTreeForQuery(ts, "()")
if it.Type() != graph.Null { if it.Type() != graph.Null {
t.Errorf(`Incorrect type for empty query, got:%q expect: "null"`, it.Type()) t.Errorf(`Incorrect type for empty query, got:%q expect: "null"`, it.Type())
} }
for _, test := range testQueries { for _, test := range testQueries {
if test.add.IsValid() { if test.add.IsValid() {
ts.AddTriple(test.add) w.AddQuad(test.add)
} }
it := BuildIteratorTreeForQuery(ts, test.query) it := BuildIteratorTreeForQuery(ts, test.query)
if it.Type() != test.typ { if it.Type() != test.typ {
@ -79,8 +81,9 @@ func TestMemstoreBackedSexp(t *testing.T) {
func TestTreeConstraintParse(t *testing.T) { func TestTreeConstraintParse(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil) ts, _ := graph.NewTripleStore("memstore", "", nil)
ts.AddTriple(quad.Quad{"i", "like", "food", ""}) w, _ := graph.NewQuadWriter("single", ts, nil)
ts.AddTriple(quad.Quad{"food", "is", "good", ""}) w.AddQuad(quad.Quad{"i", "like", "food", ""})
w.AddQuad(quad.Quad{"food", "is", "good", ""})
query := "(\"i\"\n" + query := "(\"i\"\n" +
"(:like\n" + "(:like\n" +
"($a (:is :good))))" "($a (:is :good))))"
@ -99,8 +102,9 @@ func TestTreeConstraintParse(t *testing.T) {
func TestTreeConstraintTagParse(t *testing.T) { func TestTreeConstraintTagParse(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil) ts, _ := graph.NewTripleStore("memstore", "", nil)
ts.AddTriple(quad.Quad{"i", "like", "food", ""}) w, _ := graph.NewQuadWriter("single", ts, nil)
ts.AddTriple(quad.Quad{"food", "is", "good", ""}) w.AddQuad(quad.Quad{"i", "like", "food", ""})
w.AddQuad(quad.Quad{"food", "is", "good", ""})
query := "(\"i\"\n" + query := "(\"i\"\n" +
"(:like\n" + "(:like\n" +
"($a (:is :good))))" "($a (:is :good))))"
@ -118,12 +122,13 @@ func TestTreeConstraintTagParse(t *testing.T) {
func TestMultipleConstraintParse(t *testing.T) { func TestMultipleConstraintParse(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil) ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
for _, tv := range []quad.Quad{ for _, tv := range []quad.Quad{
{"i", "like", "food", ""}, {"i", "like", "food", ""},
{"i", "like", "beer", ""}, {"i", "like", "beer", ""},
{"you", "like", "beer", ""}, {"you", "like", "beer", ""},
} { } {
ts.AddTriple(tv) w.AddQuad(tv)
} }
query := `( query := `(
$a $a

91
writer/single.go Normal file
View file

@ -0,0 +1,91 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package writer
import (
"sync"
"time"
"github.com/google/cayley/graph"
"github.com/google/cayley/quad"
)
func init() {
graph.RegisterWriter("single", NewSingleReplication)
}
type Single struct {
nextID int64
ts graph.TripleStore
mut sync.Mutex
}
func NewSingleReplication(ts graph.TripleStore, opts graph.Options) (graph.QuadWriter, error) {
horizon := ts.Horizon()
rep := &Single{nextID: horizon + 1, ts: ts}
if horizon <= 0 {
rep.nextID = 1
}
return rep, nil
}
func (s *Single) AcquireNextID() int64 {
s.mut.Lock()
defer s.mut.Unlock()
id := s.nextID
s.nextID++
return id
}
func (s *Single) AddQuad(q quad.Quad) error {
deltas := make([]graph.Delta, 1)
deltas[0] = graph.Delta{
ID: s.AcquireNextID(),
Quad: q,
Action: graph.Add,
Timestamp: time.Now(),
}
return s.ts.ApplyDeltas(deltas)
}
func (s *Single) AddQuadSet(set []quad.Quad) error {
deltas := make([]graph.Delta, len(set))
for i, q := range set {
deltas[i] = graph.Delta{
ID: s.AcquireNextID(),
Quad: q,
Action: graph.Add,
Timestamp: time.Now(),
}
}
s.ts.ApplyDeltas(deltas)
return nil
}
func (s *Single) RemoveQuad(q quad.Quad) error {
deltas := make([]graph.Delta, 1)
deltas[0] = graph.Delta{
ID: s.AcquireNextID(),
Quad: q,
Action: graph.Delete,
Timestamp: time.Now(),
}
return s.ts.ApplyDeltas(deltas)
}
func (s *Single) Close() error {
// Nothing to clean up locally.
return nil
}