Merge pull request #113 from barakmich/log_database

Convert Cayley indexing to an append-only log
This commit is contained in:
Barak Michener 2014-08-16 03:26:49 -04:00
commit e1e95b9686
27 changed files with 2326 additions and 635 deletions

View file

@ -8,6 +8,7 @@ go:
install:
- go get github.com/badgerodon/peg
- go get github.com/barakmich/glog
- go get github.com/cznic/mathutil
- go get github.com/julienschmidt/httprouter
- go get github.com/petar/GoLLRB/llrb
- go get github.com/peterh/liner

View file

@ -44,6 +44,9 @@ import (
_ "github.com/google/cayley/graph/leveldb"
_ "github.com/google/cayley/graph/memstore"
_ "github.com/google/cayley/graph/mongo"
// Load writer registry
_ "github.com/google/cayley/writer"
)
var (
@ -105,8 +108,8 @@ func main() {
}
var (
ts graph.TripleStore
err error
handle *graph.Handle
err error
)
switch cmd {
case "version":
@ -123,60 +126,60 @@ func main() {
break
}
if *tripleFile != "" {
ts, err = db.Open(cfg)
handle, err = db.Open(cfg)
if err != nil {
break
}
err = load(ts, cfg, *tripleFile, *tripleType)
err = load(handle.QuadWriter, cfg, *tripleFile, *tripleType)
if err != nil {
break
}
ts.Close()
handle.Close()
}
case "load":
ts, err = db.Open(cfg)
handle, err = db.Open(cfg)
if err != nil {
break
}
err = load(ts, cfg, *tripleFile, *tripleType)
err = load(handle.QuadWriter, cfg, *tripleFile, *tripleType)
if err != nil {
break
}
ts.Close()
handle.Close()
case "repl":
ts, err = db.Open(cfg)
handle, err = db.Open(cfg)
if err != nil {
break
}
if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", *tripleType)
err = load(handle.QuadWriter, cfg, "", *tripleType)
if err != nil {
break
}
}
err = db.Repl(ts, *queryLanguage, cfg)
err = db.Repl(handle, *queryLanguage, cfg)
ts.Close()
handle.Close()
case "http":
ts, err = db.Open(cfg)
handle, err = db.Open(cfg)
if err != nil {
break
}
if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", *tripleType)
err = load(handle.QuadWriter, cfg, "", *tripleType)
if err != nil {
break
}
}
http.Serve(ts, cfg)
http.Serve(handle, cfg)
ts.Close()
handle.Close()
default:
fmt.Println("No command", cmd)
@ -187,7 +190,29 @@ func main() {
}
}
func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error {
func load(qw graph.QuadWriter, cfg *config.Config, path, typ string) error {
return decompressAndLoad(qw, cfg, path, typ, db.Load)
}
func removeAll(qw graph.QuadWriter, cfg *config.Config, path, typ string) error {
return decompressAndLoad(qw, cfg, path, typ, remove)
}
func remove(qw graph.QuadWriter, cfg *config.Config, dec quad.Unmarshaler) error {
for {
t, err := dec.Unmarshal()
if err != nil {
if err == io.EOF {
break
}
return err
}
qw.RemoveQuad(t)
}
return nil
}
func decompressAndLoad(qw graph.QuadWriter, cfg *config.Config, path, typ string, loadFn func(graph.QuadWriter, *config.Config, quad.Unmarshaler) error) error {
var r io.Reader
if path == "" {
@ -233,7 +258,7 @@ func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error {
return fmt.Errorf("unknown quad format %q", typ)
}
return db.Load(ts, cfg, dec)
return db.Load(qw, cfg, dec)
}
const (

View file

@ -297,26 +297,45 @@ var m2_actors = movie2.Save("name","movie2").Follow(filmToActor)
`
var (
once sync.Once
cfg = &config.Config{
DatabasePath: "30kmoviedata.nq.gz",
DatabaseType: "memstore",
Timeout: 300 * time.Second,
create sync.Once
deleteAndRecreate sync.Once
cfg = &config.Config{
DatabasePath: "30kmoviedata.nq.gz",
DatabaseType: "memstore",
ReplicationType: "single",
Timeout: 300 * time.Second,
}
ts graph.TripleStore
handle *graph.Handle
)
func prepare(t testing.TB) {
var err error
once.Do(func() {
ts, err = db.Open(cfg)
create.Do(func() {
handle, err = db.Open(cfg)
if err != nil {
t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err)
}
if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", "cquad")
err = load(handle.QuadWriter, cfg, "", "cquad")
if err != nil {
t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err)
}
}
})
}
func deletePrepare(t testing.TB) {
var err error
deleteAndRecreate.Do(func() {
prepare(t)
if !graph.IsPersistent(cfg.DatabaseType) {
err = removeAll(handle.QuadWriter, cfg, "", "cquad")
if err != nil {
t.Fatalf("Failed to remove %q: %v", cfg.DatabasePath, err)
}
err = load(handle.QuadWriter, cfg, "", "cquad")
if err != nil {
t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err)
}
@ -326,11 +345,23 @@ func prepare(t testing.TB) {
func TestQueries(t *testing.T) {
prepare(t)
checkQueries(t)
}
func TestDeletedAndRecreatedQueries(t *testing.T) {
if testing.Short() {
t.Skip()
}
deletePrepare(t)
checkQueries(t)
}
func checkQueries(t *testing.T) {
for _, test := range benchmarkQueries {
if testing.Short() && test.long {
continue
}
ses := gremlin.NewSession(ts, cfg.Timeout, true)
ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true)
_, err := ses.InputParses(test.query)
if err != nil {
t.Fatalf("Failed to parse benchmark gremlin %s: %v", test.message, err)
@ -382,7 +413,7 @@ func runBench(n int, b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
c := make(chan interface{}, 5)
ses := gremlin.NewSession(ts, cfg.Timeout, true)
ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true)
// Do the parsing we know works.
ses.InputParses(benchmarkQueries[n].query)
b.StartTimer()

View file

@ -26,25 +26,29 @@ import (
)
type Config struct {
DatabaseType string
DatabasePath string
DatabaseOptions map[string]interface{}
ListenHost string
ListenPort string
ReadOnly bool
Timeout time.Duration
LoadSize int
DatabaseType string
DatabasePath string
DatabaseOptions map[string]interface{}
ReplicationType string
ReplicationOptions map[string]interface{}
ListenHost string
ListenPort string
ReadOnly bool
Timeout time.Duration
LoadSize int
}
type config struct {
DatabaseType string `json:"database"`
DatabasePath string `json:"db_path"`
DatabaseOptions map[string]interface{} `json:"db_options"`
ListenHost string `json:"listen_host"`
ListenPort string `json:"listen_port"`
ReadOnly bool `json:"read_only"`
Timeout duration `json:"timeout"`
LoadSize int `json:"load_size"`
DatabaseType string `json:"database"`
DatabasePath string `json:"db_path"`
DatabaseOptions map[string]interface{} `json:"db_options"`
ReplicationType string `json:"replication"`
ReplicationOptions map[string]interface{} `json:"replication_options"`
ListenHost string `json:"listen_host"`
ListenPort string `json:"listen_port"`
ReadOnly bool `json:"read_only"`
Timeout duration `json:"timeout"`
LoadSize int `json:"load_size"`
}
func (c *Config) UnmarshalJSON(data []byte) error {
@ -54,28 +58,32 @@ func (c *Config) UnmarshalJSON(data []byte) error {
return err
}
*c = Config{
DatabaseType: t.DatabaseType,
DatabasePath: t.DatabasePath,
DatabaseOptions: t.DatabaseOptions,
ListenHost: t.ListenHost,
ListenPort: t.ListenPort,
ReadOnly: t.ReadOnly,
Timeout: time.Duration(t.Timeout),
LoadSize: t.LoadSize,
DatabaseType: t.DatabaseType,
DatabasePath: t.DatabasePath,
DatabaseOptions: t.DatabaseOptions,
ReplicationType: t.ReplicationType,
ReplicationOptions: t.ReplicationOptions,
ListenHost: t.ListenHost,
ListenPort: t.ListenPort,
ReadOnly: t.ReadOnly,
Timeout: time.Duration(t.Timeout),
LoadSize: t.LoadSize,
}
return nil
}
func (c *Config) MarshalJSON() ([]byte, error) {
return json.Marshal(config{
DatabaseType: c.DatabaseType,
DatabasePath: c.DatabasePath,
DatabaseOptions: c.DatabaseOptions,
ListenHost: c.ListenHost,
ListenPort: c.ListenPort,
ReadOnly: c.ReadOnly,
Timeout: duration(c.Timeout),
LoadSize: c.LoadSize,
DatabaseType: c.DatabaseType,
DatabasePath: c.DatabasePath,
DatabaseOptions: c.DatabaseOptions,
ReplicationType: c.ReplicationType,
ReplicationOptions: c.ReplicationOptions,
ListenHost: c.ListenHost,
ListenPort: c.ListenPort,
ReadOnly: c.ReadOnly,
Timeout: duration(c.Timeout),
LoadSize: c.LoadSize,
})
}
@ -115,13 +123,14 @@ func (d *duration) MarshalJSON() ([]byte, error) {
}
var (
databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.")
databaseBackend = flag.String("db", "memstore", "Database Backend.")
host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).")
loadSize = flag.Int("load_size", 10000, "Size of triplesets to load")
port = flag.String("port", "64210", "Port to listen on.")
readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.")
timeout = flag.Duration("timeout", 30*time.Second, "Elapsed time until an individual query times out.")
databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.")
databaseBackend = flag.String("db", "memstore", "Database Backend.")
replicationBackend = flag.String("replication", "single", "Replication method.")
host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).")
loadSize = flag.Int("load_size", 10000, "Size of triplesets to load")
port = flag.String("port", "64210", "Port to listen on.")
readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.")
timeout = flag.Duration("timeout", 30*time.Second, "Elapsed time until an individual query times out.")
)
func ParseConfigFromFile(filename string) *Config {
@ -175,6 +184,10 @@ func ParseConfigFromFlagsAndFile(fileFlag string) *Config {
config.DatabaseType = *databaseBackend
}
if config.ReplicationType == "" {
config.ReplicationType = *replicationBackend
}
if config.ListenHost == "" {
config.ListenHost = *host
}

View file

@ -36,8 +36,20 @@ func Init(cfg *config.Config) error {
return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
}
func Open(cfg *config.Config) (graph.TripleStore, error) {
glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath)
func Open(cfg *config.Config) (*graph.Handle, error) {
qs, err := OpenQuadStore(cfg)
if err != nil {
return nil, err
}
qw, err := OpenQuadWriter(qs, cfg)
if err != nil {
return nil, err
}
return &graph.Handle{QuadStore: qs, QuadWriter: qw}, nil
}
func OpenQuadStore(cfg *config.Config) (graph.TripleStore, error) {
glog.Infof("Opening quad store %q at %s", cfg.DatabaseType, cfg.DatabasePath)
ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
if err != nil {
return nil, err
@ -46,19 +58,17 @@ func Open(cfg *config.Config) (graph.TripleStore, error) {
return ts, nil
}
func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error {
bulker, canBulk := ts.(graph.BulkLoader)
if canBulk {
switch err := bulker.BulkLoad(dec); err {
case nil:
return nil
case graph.ErrCannotBulkLoad:
// Try individual loading.
default:
return err
}
func OpenQuadWriter(qs graph.TripleStore, cfg *config.Config) (graph.QuadWriter, error) {
glog.Infof("Opening replication method %q", cfg.ReplicationType)
w, err := graph.NewQuadWriter(cfg.ReplicationType, qs, cfg.ReplicationOptions)
if err != nil {
return nil, err
}
return w, nil
}
func Load(qw graph.QuadWriter, cfg *config.Config, dec quad.Unmarshaler) error {
block := make([]quad.Quad, 0, cfg.LoadSize)
for {
t, err := dec.Unmarshal()
@ -70,11 +80,11 @@ func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error
}
block = append(block, t)
if len(block) == cap(block) {
ts.AddTripleSet(block)
qw.AddQuadSet(block)
block = block[:0]
}
}
ts.AddTripleSet(block)
qw.AddQuadSet(block)
return nil
}

View file

@ -70,17 +70,17 @@ const (
history = ".cayley_history"
)
func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error {
func Repl(h *graph.Handle, queryLanguage string, cfg *config.Config) error {
var ses query.Session
switch queryLanguage {
case "sexp":
ses = sexp.NewSession(ts)
ses = sexp.NewSession(h.QuadStore)
case "mql":
ses = mql.NewSession(ts)
ses = mql.NewSession(h.QuadStore)
case "gremlin":
fallthrough
default:
ses = gremlin.NewSession(ts, cfg.Timeout, true)
ses = gremlin.NewSession(h.QuadStore, cfg.Timeout, true)
}
term, err := terminal(history)
@ -124,25 +124,25 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error
continue
case strings.HasPrefix(line, ":a"):
triple, err := cquads.Parse(line[3:])
if !triple.IsValid() {
quad, err := cquads.Parse(line[3:])
if !quad.IsValid() {
if err != nil {
fmt.Printf("not a valid triple: %v\n", err)
fmt.Printf("not a valid quad: %v\n", err)
}
continue
}
ts.AddTriple(triple)
h.QuadWriter.AddQuad(quad)
continue
case strings.HasPrefix(line, ":d"):
triple, err := cquads.Parse(line[3:])
if !triple.IsValid() {
quad, err := cquads.Parse(line[3:])
if !quad.IsValid() {
if err != nil {
fmt.Printf("not a valid triple: %v\n", err)
fmt.Printf("not a valid quad: %v\n", err)
}
continue
}
ts.RemoveTriple(triple)
h.QuadWriter.RemoveQuad(quad)
continue
}
}

View file

@ -36,9 +36,7 @@ func (qs *store) ValueOf(s string) graph.Value {
return nil
}
func (qs *store) AddTriple(quad.Quad) {}
func (qs *store) AddTripleSet([]quad.Quad) {}
func (qs *store) ApplyDeltas([]graph.Delta) error { return nil }
func (qs *store) Quad(graph.Value) quad.Quad { return quad.Quad{} }
@ -60,6 +58,8 @@ func (qs *store) NameOf(v graph.Value) string {
func (qs *store) Size() int64 { return 0 }
func (qs *store) Horizon() int64 { return 0 }
func (qs *store) DebugPrint() {}
func (qs *store) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) {

View file

@ -16,9 +16,11 @@ package leveldb
import (
"bytes"
"encoding/json"
"fmt"
"strings"
"github.com/barakmich/glog"
ldbit "github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
@ -65,10 +67,9 @@ func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleS
ok := it.iter.Seek(it.nextPrefix)
if !ok {
// FIXME(kortschak) What are the semantics here? Is this iterator usable?
// If not, we should return nil *Iterator and an error.
it.open = false
it.iter.Release()
glog.Error("Opening LevelDB iterator couldn't seek to location ", it.nextPrefix)
}
return &it
@ -117,6 +118,12 @@ func (it *Iterator) Close() {
}
}
func (it *Iterator) isLiveValue(val []byte) bool {
var entry IndexEntry
json.Unmarshal(val, &entry)
return len(entry.History)%2 != 0
}
func (it *Iterator) Next() bool {
if it.iter == nil {
it.result = nil
@ -132,6 +139,9 @@ func (it *Iterator) Next() bool {
return false
}
if bytes.HasPrefix(it.iter.Key(), it.nextPrefix) {
if !it.isLiveValue(it.iter.Value()) {
return it.Next()
}
out := make([]byte, len(it.iter.Key()))
copy(out, it.iter.Key())
it.result = Token(out)
@ -173,7 +183,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
case quad.Object:
return 2*hashSize + 2
case quad.Label:
return -1
return 3*hashSize + 2
}
}
if bytes.Equal(prefix, []byte("po")) {
@ -185,7 +195,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
case quad.Object:
return hashSize + 2
case quad.Label:
return -1
return hashSize + 2
}
}
if bytes.Equal(prefix, []byte("os")) {
@ -197,7 +207,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int {
case quad.Object:
return 2
case quad.Label:
return -1
return 3*hashSize + 2
}
}
if bytes.Equal(prefix, []byte("cp")) {
@ -221,16 +231,17 @@ func (it *Iterator) Contains(v graph.Value) bool {
return false
}
offset := PositionOf(val[0:2], it.dir, it.qs)
if offset != -1 {
if bytes.HasPrefix(val[offset:], it.checkId[1:]) {
return true
}
} else {
nameForDir := it.qs.Quad(v).Get(it.dir)
hashForDir := it.qs.ValueOf(nameForDir).(Token)
if bytes.Equal(hashForDir, it.checkId) {
return true
}
if bytes.HasPrefix(val[offset:], it.checkId[1:]) {
// You may ask, why don't we check to see if it's a valid (not deleted) triple
// again?
//
// We've already done that -- in order to get the graph.Value token in the
// first place, we had to have done the check already; it came from a Next().
//
// However, if it ever starts coming from somewhere else, it'll be more
// efficient to change the interface of the graph.Value for LevelDB to a
// struct with a flag for isValid, to save another random read.
return true
}
return false
}

View file

@ -24,6 +24,7 @@ import (
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad"
"github.com/google/cayley/writer"
)
func makeTripleSet() []quad.Quad {
@ -135,7 +136,8 @@ func TestLoadDatabase(t *testing.T) {
t.Error("Failed to create leveldb TripleStore.")
}
qs.AddTriple(quad.Quad{"Something", "points_to", "Something Else", "context"})
w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuad(quad.Quad{"Something", "points_to", "Something Else", "context"})
for _, pq := range []string{"Something", "points_to", "Something Else", "context"} {
if got := qs.NameOf(qs.ValueOf(pq)); got != pq {
t.Errorf("Failed to roundtrip %q, got:%q expect:%q", pq, got, pq)
@ -154,13 +156,14 @@ func TestLoadDatabase(t *testing.T) {
if qs == nil || err != nil {
t.Error("Failed to create leveldb TripleStore.")
}
w, _ = writer.NewSingleReplication(qs, nil)
ts2, didConvert := qs.(*TripleStore)
if !didConvert {
t.Errorf("Could not convert from generic to LevelDB TripleStore")
}
qs.AddTripleSet(makeTripleSet())
w.AddQuadSet(makeTripleSet())
if s := qs.Size(); s != 11 {
t.Errorf("Unexpected triplestore size, got:%d expect:11", s)
}
@ -168,7 +171,7 @@ func TestLoadDatabase(t *testing.T) {
t.Errorf("Unexpected triplestore size, got:%d expect:5", s)
}
qs.RemoveTriple(quad.Quad{"A", "follows", "B", ""})
w.RemoveQuad(quad.Quad{"A", "follows", "B", ""})
if s := qs.Size(); s != 10 {
t.Errorf("Unexpected triplestore size after RemoveTriple, got:%d expect:10", s)
}
@ -196,7 +199,9 @@ func TestIterator(t *testing.T) {
if qs == nil || err != nil {
t.Error("Failed to create leveldb TripleStore.")
}
qs.AddTripleSet(makeTripleSet())
w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuadSet(makeTripleSet())
var it graph.Iterator
it = qs.NodesAllIterator()
@ -291,7 +296,8 @@ func TestSetIterator(t *testing.T) {
}
defer qs.Close()
qs.AddTripleSet(makeTripleSet())
w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuadSet(makeTripleSet())
expect := []quad.Quad{
{"C", "follows", "B", ""},
@ -403,7 +409,9 @@ func TestOptimize(t *testing.T) {
if qs == nil || err != nil {
t.Error("Failed to create leveldb TripleStore.")
}
qs.AddTripleSet(makeTripleSet())
w, _ := writer.NewSingleReplication(qs, nil)
w.AddQuadSet(makeTripleSet())
// With an linksto-fixed pair
fixed := qs.FixedIterator()

View file

@ -19,6 +19,7 @@ import (
"crypto/sha1"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"hash"
"sync"
@ -62,6 +63,7 @@ type TripleStore struct {
path string
open bool
size int64
horizon int64
writeopts *opt.WriteOptions
readopts *opt.ReadOptions
}
@ -85,6 +87,7 @@ func createNewLevelDB(path string, _ graph.Options) error {
func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) {
var qs TripleStore
var err error
qs.path = path
cache_size := DefaultCacheSize
if val, ok := options.IntKey("cache_size_mb"); ok {
@ -106,11 +109,15 @@ func newTripleStore(path string, options graph.Options) (graph.TripleStore, erro
qs.readopts = &opt.ReadOptions{}
db, err := leveldb.OpenFile(qs.path, qs.dbOpts)
if err != nil {
panic("Error, couldn't open! " + err.Error())
glog.Errorln("Error, couldn't open! ", err)
return nil, err
}
qs.db = db
glog.Infoln(qs.GetStats())
qs.getSize()
err = qs.getMetadata()
if err != nil {
return nil, err
}
return &qs, nil
}
@ -128,24 +135,25 @@ func (qs *TripleStore) Size() int64 {
return qs.size
}
func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
func (qs *TripleStore) Horizon() int64 {
return qs.horizon
}
func (qa *TripleStore) createDeltaKeyFor(d graph.Delta) []byte {
key := make([]byte, 0, 19)
key = append(key, 'd')
key = append(key, []byte(fmt.Sprintf("%018x", d.ID))...)
return key
}
func (qs *TripleStore) createKeyFor(d [4]quad.Direction, triple quad.Quad) []byte {
key := make([]byte, 0, 2+(hashSize*3))
// TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
return key
}
func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
key := make([]byte, 0, 2+(hashSize*4))
// TODO(kortschak) Remove dependence on String() method.
key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...)
key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
key = append(key, qs.convertStringToByteHash(triple.Get(d[3]))...)
return key
}
@ -156,76 +164,98 @@ func (qs *TripleStore) createValueKeyFor(s string) []byte {
return key
}
func (qs *TripleStore) AddTriple(t quad.Quad) {
batch := &leveldb.Batch{}
qs.buildWrite(batch, t)
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Errorf("Couldn't write to DB for triple %s.", t)
return
}
qs.size++
type IndexEntry struct {
quad.Quad
History []int64
}
// Short hand for direction permutations.
var (
spo = [3]quad.Direction{quad.Subject, quad.Predicate, quad.Object}
osp = [3]quad.Direction{quad.Object, quad.Subject, quad.Predicate}
pos = [3]quad.Direction{quad.Predicate, quad.Object, quad.Subject}
pso = [3]quad.Direction{quad.Predicate, quad.Subject, quad.Object}
spo = [4]quad.Direction{quad.Subject, quad.Predicate, quad.Object, quad.Label}
osp = [4]quad.Direction{quad.Object, quad.Subject, quad.Predicate, quad.Label}
pos = [4]quad.Direction{quad.Predicate, quad.Object, quad.Subject, quad.Label}
cps = [4]quad.Direction{quad.Label, quad.Predicate, quad.Subject, quad.Object}
)
func (qs *TripleStore) RemoveTriple(t quad.Quad) {
_, err := qs.db.Get(qs.createKeyFor(spo, t), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
glog.Error("Couldn't access DB to confirm deletion")
return
}
if err == leveldb.ErrNotFound {
// No such triple in the database, forget about it.
return
}
func (qs *TripleStore) ApplyDeltas(deltas []graph.Delta) error {
batch := &leveldb.Batch{}
batch.Delete(qs.createKeyFor(spo, t))
batch.Delete(qs.createKeyFor(osp, t))
batch.Delete(qs.createKeyFor(pos, t))
qs.UpdateValueKeyBy(t.Get(quad.Subject), -1, batch)
qs.UpdateValueKeyBy(t.Get(quad.Predicate), -1, batch)
qs.UpdateValueKeyBy(t.Get(quad.Object), -1, batch)
if t.Get(quad.Label) != "" {
batch.Delete(qs.createProvKeyFor(pso, t))
qs.UpdateValueKeyBy(t.Get(quad.Label), -1, batch)
resizeMap := make(map[string]int64)
size_change := int64(0)
for _, d := range deltas {
bytes, err := json.Marshal(d)
if err != nil {
return err
}
batch.Put(qs.createDeltaKeyFor(d), bytes)
err = qs.buildQuadWrite(batch, d.Quad, d.ID, d.Action == graph.Add)
if err != nil {
return err
}
delta := int64(1)
if d.Action == graph.Delete {
delta = int64(-1)
}
resizeMap[d.Quad.Subject] += delta
resizeMap[d.Quad.Predicate] += delta
resizeMap[d.Quad.Object] += delta
if d.Quad.Label != "" {
resizeMap[d.Quad.Label] += delta
}
size_change += delta
qs.horizon = d.ID
}
err = qs.db.Write(batch, nil)
for k, v := range resizeMap {
if v != 0 {
err := qs.UpdateValueKeyBy(k, v, batch)
if err != nil {
return err
}
}
}
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Errorf("Couldn't delete triple %s.", t)
return
glog.Error("Couldn't write to DB for tripleset.")
return err
}
qs.size--
qs.size += size_change
return nil
}
func (qs *TripleStore) buildTripleWrite(batch *leveldb.Batch, t quad.Quad) {
bytes, err := json.Marshal(t)
if err != nil {
glog.Errorf("Couldn't write to buffer for triple %s: %s", t, err)
return
func (qs *TripleStore) buildQuadWrite(batch *leveldb.Batch, q quad.Quad, id int64, isAdd bool) error {
var entry IndexEntry
data, err := qs.db.Get(qs.createKeyFor(spo, q), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
glog.Error("Couldn't access DB to prepare index: ", err)
return err
}
batch.Put(qs.createKeyFor(spo, t), bytes)
batch.Put(qs.createKeyFor(osp, t), bytes)
batch.Put(qs.createKeyFor(pos, t), bytes)
if t.Get(quad.Label) != "" {
batch.Put(qs.createProvKeyFor(pso, t), bytes)
if err == nil {
// We got something.
err = json.Unmarshal(data, &entry)
if err != nil {
return err
}
} else {
entry.Quad = q
}
}
entry.History = append(entry.History, id)
func (qs *TripleStore) buildWrite(batch *leveldb.Batch, t quad.Quad) {
qs.buildTripleWrite(batch, t)
qs.UpdateValueKeyBy(t.Get(quad.Subject), 1, nil)
qs.UpdateValueKeyBy(t.Get(quad.Predicate), 1, nil)
qs.UpdateValueKeyBy(t.Get(quad.Object), 1, nil)
if t.Get(quad.Label) != "" {
qs.UpdateValueKeyBy(t.Get(quad.Label), 1, nil)
if isAdd && len(entry.History)%2 == 0 {
glog.Error("Entry History is out of sync for", entry)
return errors.New("Odd index history")
}
bytes, err := json.Marshal(entry)
if err != nil {
glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err)
return err
}
batch.Put(qs.createKeyFor(spo, q), bytes)
batch.Put(qs.createKeyFor(osp, q), bytes)
batch.Put(qs.createKeyFor(pos, q), bytes)
if q.Get(quad.Label) != "" {
batch.Put(qs.createKeyFor(cps, q), bytes)
}
return nil
}
type ValueData struct {
@ -233,15 +263,15 @@ type ValueData struct {
Size int64
}
func (qs *TripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb.Batch) {
value := &ValueData{name, int64(amount)}
func (qs *TripleStore) UpdateValueKeyBy(name string, amount int64, batch *leveldb.Batch) error {
value := &ValueData{name, amount}
key := qs.createValueKeyFor(name)
b, err := qs.db.Get(key, qs.readopts)
// Error getting the node from the database.
if err != nil && err != leveldb.ErrNotFound {
glog.Errorf("Error reading Value %s from the DB.", name)
return
return err
}
// Node exists in the database -- unmarshal and update.
@ -249,58 +279,28 @@ func (qs *TripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb.
err = json.Unmarshal(b, value)
if err != nil {
glog.Errorf("Error: couldn't reconstruct value: %v", err)
return
return err
}
value.Size += int64(amount)
value.Size += amount
}
// Are we deleting something?
if amount < 0 {
if value.Size <= 0 {
if batch == nil {
qs.db.Delete(key, qs.writeopts)
} else {
batch.Delete(key)
}
return
}
if value.Size <= 0 {
value.Size = 0
}
// Repackage and rewrite.
bytes, err := json.Marshal(&value)
if err != nil {
glog.Errorf("Couldn't write to buffer for value %s: %s", name, err)
return
return err
}
if batch == nil {
qs.db.Put(key, bytes, qs.writeopts)
} else {
batch.Put(key, bytes)
}
}
func (qs *TripleStore) AddTripleSet(t_s []quad.Quad) {
batch := &leveldb.Batch{}
newTs := len(t_s)
resizeMap := make(map[string]int)
for _, t := range t_s {
qs.buildTripleWrite(batch, t)
resizeMap[t.Subject]++
resizeMap[t.Predicate]++
resizeMap[t.Object]++
if t.Label != "" {
resizeMap[t.Label]++
}
}
for k, v := range resizeMap {
qs.UpdateValueKeyBy(k, v, batch)
}
err := qs.db.Write(batch, qs.writeopts)
if err != nil {
glog.Error("Couldn't write to DB for tripleset.")
return
}
qs.size += int64(newTs)
return nil
}
func (qs *TripleStore) Close() {
@ -314,6 +314,16 @@ func (qs *TripleStore) Close() {
} else {
glog.Errorf("Couldn't convert size before closing!")
}
buf.Reset()
err = binary.Write(buf, binary.LittleEndian, qs.horizon)
if err == nil {
werr := qs.db.Put([]byte("__horizon"), buf.Bytes(), qs.writeopts)
if werr != nil {
glog.Error("Couldn't write horizon before closing!")
}
} else {
glog.Errorf("Couldn't convert horizon before closing!")
}
qs.db.Close()
qs.open = false
}
@ -386,23 +396,34 @@ func (qs *TripleStore) SizeOf(k graph.Value) int64 {
return int64(qs.valueData(k.(Token)).Size)
}
func (qs *TripleStore) getSize() {
var size int64
b, err := qs.db.Get([]byte("__size"), qs.readopts)
func (qs *TripleStore) getInt64ForKey(key string, empty int64) (int64, error) {
var out int64
b, err := qs.db.Get([]byte(key), qs.readopts)
if err != nil && err != leveldb.ErrNotFound {
panic("Couldn't read size " + err.Error())
glog.Errorln("Couldn't read " + key + ": " + err.Error())
return 0, err
}
if err == leveldb.ErrNotFound {
// Must be a new database. Cool
qs.size = 0
return
return empty, nil
}
buf := bytes.NewBuffer(b)
err = binary.Read(buf, binary.LittleEndian, &size)
err = binary.Read(buf, binary.LittleEndian, &out)
if err != nil {
glog.Errorln("Error: couldn't parse size")
glog.Errorln("Error: couldn't parse", key)
return 0, err
}
qs.size = size
return out, nil
}
func (qs *TripleStore) getMetadata() error {
var err error
qs.size, err = qs.getInt64ForKey("__size", 0)
if err != nil {
return err
}
qs.horizon, err = qs.getInt64ForKey("__horizon", 0)
return err
}
func (qs *TripleStore) SizeOfPrefix(pre []byte) (int64, error) {

View file

@ -24,19 +24,22 @@ type AllIterator struct {
ts *TripleStore
}
func NewMemstoreAllIterator(ts *TripleStore) *AllIterator {
var out AllIterator
type NodesAllIterator AllIterator
type QuadsAllIterator AllIterator
func NewMemstoreNodesAllIterator(ts *TripleStore) *NodesAllIterator {
var out NodesAllIterator
out.Int64 = *iterator.NewInt64(1, ts.idCounter-1)
out.ts = ts
return &out
}
// No subiterators.
func (it *AllIterator) SubIterators() []graph.Iterator {
func (it *NodesAllIterator) SubIterators() []graph.Iterator {
return nil
}
func (it *AllIterator) Next() bool {
func (it *NodesAllIterator) Next() bool {
if !it.Int64.Next() {
return false
}
@ -46,3 +49,21 @@ func (it *AllIterator) Next() bool {
}
return true
}
func NewMemstoreQuadsAllIterator(ts *TripleStore) *QuadsAllIterator {
var out QuadsAllIterator
out.Int64 = *iterator.NewInt64(1, ts.quadIdCounter-1)
out.ts = ts
return &out
}
func (qit *QuadsAllIterator) Next() bool {
out := qit.Int64.Next()
if out {
i64 := qit.Int64.Result().(int64)
if qit.ts.log[i64].DeletedBy != 0 || qit.ts.log[i64].Action == graph.Delete {
return qit.Next()
}
}
return out
}

972
graph/memstore/b/keys.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,396 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package b
import (
"math"
"runtime/debug"
"testing"
"github.com/cznic/mathutil"
)
func rng() *mathutil.FC32 {
x, err := mathutil.NewFC32(math.MinInt32/4, math.MaxInt32/4, false)
if err != nil {
panic(err)
}
return x
}
func cmp(a, b int64) int {
return int(a - b)
}
func BenchmarkSetSeq1e3(b *testing.B) {
benchmarkSetSeq(b, 1e3)
}
func BenchmarkSetSeq1e4(b *testing.B) {
benchmarkSetSeq(b, 1e4)
}
func BenchmarkSetSeq1e5(b *testing.B) {
benchmarkSetSeq(b, 1e5)
}
func BenchmarkSetSeq1e6(b *testing.B) {
benchmarkSetSeq(b, 1e6)
}
func benchmarkSetSeq(b *testing.B, n int) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
r.Set(j, struct{}{})
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkGetSeq1e3(b *testing.B) {
benchmarkGetSeq(b, 1e3)
}
func BenchmarkGetSeq1e4(b *testing.B) {
benchmarkGetSeq(b, 1e4)
}
func BenchmarkGetSeq1e5(b *testing.B) {
benchmarkGetSeq(b, 1e5)
}
func BenchmarkGetSeq1e6(b *testing.B) {
benchmarkGetSeq(b, 1e6)
}
func benchmarkGetSeq(b *testing.B, n int) {
r := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
r.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for j := int64(0); j < int64(n); j++ {
r.Get(j)
}
}
b.StopTimer()
r.Close()
}
func BenchmarkSetRnd1e3(b *testing.B) {
benchmarkSetRnd(b, 1e3)
}
func BenchmarkSetRnd1e4(b *testing.B) {
benchmarkSetRnd(b, 1e4)
}
func BenchmarkSetRnd1e5(b *testing.B) {
benchmarkSetRnd(b, 1e5)
}
func BenchmarkSetRnd1e6(b *testing.B) {
benchmarkSetRnd(b, 1e6)
}
func benchmarkSetRnd(b *testing.B, n int) {
rng := rng()
a := make([]int, n)
for i := range a {
a[i] = rng.Next()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
debug.FreeOSMemory()
b.StartTimer()
for _, v := range a {
r.Set(int64(v), struct{}{})
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkGetRnd1e3(b *testing.B) {
benchmarkGetRnd(b, 1e3)
}
func BenchmarkGetRnd1e4(b *testing.B) {
benchmarkGetRnd(b, 1e4)
}
func BenchmarkGetRnd1e5(b *testing.B) {
benchmarkGetRnd(b, 1e5)
}
func BenchmarkGetRnd1e6(b *testing.B) {
benchmarkGetRnd(b, 1e6)
}
func benchmarkGetRnd(b *testing.B, n int) {
r := TreeNew(cmp)
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range a {
r.Get(v)
}
}
b.StopTimer()
r.Close()
}
func BenchmarkDelSeq1e3(b *testing.B) {
benchmarkDelSeq(b, 1e3)
}
func BenchmarkDelSeq1e4(b *testing.B) {
benchmarkDelSeq(b, 1e4)
}
func BenchmarkDelSeq1e5(b *testing.B) {
benchmarkDelSeq(b, 1e5)
}
func BenchmarkDelSeq1e6(b *testing.B) {
benchmarkDelSeq(b, 1e6)
}
func benchmarkDelSeq(b *testing.B, n int) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
for j := int64(0); j < int64(n); j++ {
r.Set(j, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
r.Delete(j)
}
}
b.StopTimer()
}
func BenchmarkDelRnd1e3(b *testing.B) {
benchmarkDelRnd(b, 1e3)
}
func BenchmarkDelRnd1e4(b *testing.B) {
benchmarkDelRnd(b, 1e4)
}
func BenchmarkDelRnd1e5(b *testing.B) {
benchmarkDelRnd(b, 1e5)
}
func BenchmarkDelRnd1e6(b *testing.B) {
benchmarkDelRnd(b, 1e6)
}
func benchmarkDelRnd(b *testing.B, n int) {
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
r := TreeNew(cmp)
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for _, v := range a {
r.Delete(v)
}
b.StopTimer()
r.Close()
}
b.StopTimer()
}
func BenchmarkSeekSeq1e3(b *testing.B) {
benchmarkSeekSeq(b, 1e3)
}
func BenchmarkSeekSeq1e4(b *testing.B) {
benchmarkSeekSeq(b, 1e4)
}
func BenchmarkSeekSeq1e5(b *testing.B) {
benchmarkSeekSeq(b, 1e5)
}
func BenchmarkSeekSeq1e6(b *testing.B) {
benchmarkSeekSeq(b, 1e6)
}
func benchmarkSeekSeq(b *testing.B, n int) {
for i := 0; i < b.N; i++ {
b.StopTimer()
t := TreeNew(cmp)
for j := int64(0); j < int64(n); j++ {
t.Set(j, struct{}{})
}
debug.FreeOSMemory()
b.StartTimer()
for j := int64(0); j < int64(n); j++ {
e, _ := t.Seek(j)
e.Close()
}
b.StopTimer()
t.Close()
}
b.StopTimer()
}
func BenchmarkSeekRnd1e3(b *testing.B) {
benchmarkSeekRnd(b, 1e3)
}
func BenchmarkSeekRnd1e4(b *testing.B) {
benchmarkSeekRnd(b, 1e4)
}
func BenchmarkSeekRnd1e5(b *testing.B) {
benchmarkSeekRnd(b, 1e5)
}
func BenchmarkSeekRnd1e6(b *testing.B) {
benchmarkSeekRnd(b, 1e6)
}
func benchmarkSeekRnd(b *testing.B, n int) {
r := TreeNew(cmp)
rng := rng()
a := make([]int64, n)
for i := range a {
a[i] = int64(rng.Next())
}
for _, v := range a {
r.Set(v, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range a {
e, _ := r.Seek(v)
e.Close()
}
}
b.StopTimer()
r.Close()
}
func BenchmarkNext1e3(b *testing.B) {
benchmarkNext(b, 1e3)
}
func BenchmarkNext1e4(b *testing.B) {
benchmarkNext(b, 1e4)
}
func BenchmarkNext1e5(b *testing.B) {
benchmarkNext(b, 1e5)
}
func BenchmarkNext1e6(b *testing.B) {
benchmarkNext(b, 1e6)
}
func benchmarkNext(b *testing.B, n int) {
t := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
t.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
en, err := t.SeekFirst()
if err != nil {
b.Fatal(err)
}
m := 0
for {
if _, _, err = en.Next(); err != nil {
break
}
m++
}
if m != n {
b.Fatal(m)
}
}
b.StopTimer()
t.Close()
}
func BenchmarkPrev1e3(b *testing.B) {
benchmarkPrev(b, 1e3)
}
func BenchmarkPrev1e4(b *testing.B) {
benchmarkPrev(b, 1e4)
}
func BenchmarkPrev1e5(b *testing.B) {
benchmarkPrev(b, 1e5)
}
func BenchmarkPrev1e6(b *testing.B) {
benchmarkPrev(b, 1e6)
}
func benchmarkPrev(b *testing.B, n int) {
t := TreeNew(cmp)
for i := int64(0); i < int64(n); i++ {
t.Set(i, struct{}{})
}
debug.FreeOSMemory()
b.ResetTimer()
for i := 0; i < b.N; i++ {
en, err := t.SeekLast()
if err != nil {
b.Fatal(err)
}
m := 0
for {
if _, _, err = en.Prev(); err != nil {
break
}
m++
}
if m != n {
b.Fatal(m)
}
}
}

View file

@ -19,47 +19,36 @@ import (
"math"
"strings"
"github.com/petar/GoLLRB/llrb"
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/graph/memstore/b"
)
type Iterator struct {
uid uint64
tags graph.Tagger
tree *llrb.LLRB
data string
isRunning bool
iterLast Int64
result graph.Value
uid uint64
ts *TripleStore
tags graph.Tagger
tree *b.Tree
iter *b.Enumerator
data string
result graph.Value
}
type Int64 int64
func (i Int64) Less(than llrb.Item) bool {
return i < than.(Int64)
func cmp(a, b int64) int {
return int(a - b)
}
func IterateOne(tree *llrb.LLRB, last Int64) Int64 {
var next Int64
tree.AscendGreaterOrEqual(last, func(i llrb.Item) bool {
if i.(Int64) == last {
return true
} else {
next = i.(Int64)
return false
}
})
return next
}
func NewLlrbIterator(tree *llrb.LLRB, data string) *Iterator {
func NewIterator(tree *b.Tree, data string, ts *TripleStore) *Iterator {
iter, err := tree.SeekFirst()
if err != nil {
iter = nil
}
return &Iterator{
uid: iterator.NextUID(),
tree: tree,
iterLast: Int64(-1),
data: data,
uid: iterator.NextUID(),
ts: ts,
tree: tree,
iter: iter,
data: data,
}
}
@ -68,7 +57,11 @@ func (it *Iterator) UID() uint64 {
}
func (it *Iterator) Reset() {
it.iterLast = Int64(-1)
var err error
it.iter, err = it.tree.SeekFirst()
if err != nil {
it.iter = nil
}
}
func (it *Iterator) Tagger() *graph.Tagger {
@ -86,20 +79,53 @@ func (it *Iterator) TagResults(dst map[string]graph.Value) {
}
func (it *Iterator) Clone() graph.Iterator {
m := NewLlrbIterator(it.tree, it.data)
var iter *b.Enumerator
if it.result != nil {
var ok bool
iter, ok = it.tree.Seek(it.result.(int64))
if !ok {
panic("value unexpectedly missing")
}
} else {
var err error
iter, err = it.tree.SeekFirst()
if err != nil {
iter = nil
}
}
m := &Iterator{
uid: iterator.NextUID(),
ts: it.ts,
tree: it.tree,
iter: iter,
data: it.data,
}
m.tags.CopyFrom(it)
return m
}
func (it *Iterator) Close() {}
func (it *Iterator) checkValid(index int64) bool {
return it.ts.log[index].DeletedBy == 0
}
func (it *Iterator) Next() bool {
graph.NextLogIn(it)
if it.tree.Max() == nil || it.result == int64(it.tree.Max().(Int64)) {
if it.iter == nil {
return graph.NextLogOut(it, nil, false)
}
it.iterLast = IterateOne(it.tree, it.iterLast)
it.result = int64(it.iterLast)
result, _, err := it.iter.Next()
if err != nil {
return graph.NextLogOut(it, nil, false)
}
if !it.checkValid(result) {
return it.Next()
}
it.result = result
return graph.NextLogOut(it, it.result, true)
}
@ -126,7 +152,7 @@ func (it *Iterator) Size() (int64, bool) {
func (it *Iterator) Contains(v graph.Value) bool {
graph.ContainsLogIn(it, v)
if it.tree.Has(Int64(v.(int64))) {
if _, ok := it.tree.Get(v.(int64)); ok {
it.result = v
return graph.ContainsLogOut(it, v, true)
}
@ -141,7 +167,7 @@ func (it *Iterator) DebugString(indent int) string {
var memType graph.Type
func init() {
memType = graph.RegisterIterator("llrb")
memType = graph.RegisterIterator("b+tree")
}
func Type() graph.Type { return memType }

View file

@ -18,11 +18,11 @@ import (
"fmt"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/graph/memstore/b"
"github.com/google/cayley/quad"
"github.com/petar/GoLLRB/llrb"
)
func init() {
@ -31,129 +31,131 @@ func init() {
}, nil)
}
type TripleDirectionIndex struct {
subject map[int64]*llrb.LLRB
predicate map[int64]*llrb.LLRB
object map[int64]*llrb.LLRB
label map[int64]*llrb.LLRB
type QuadDirectionIndex struct {
index [4]map[int64]*b.Tree
}
func NewTripleDirectionIndex() *TripleDirectionIndex {
var tdi TripleDirectionIndex
tdi.subject = make(map[int64]*llrb.LLRB)
tdi.predicate = make(map[int64]*llrb.LLRB)
tdi.object = make(map[int64]*llrb.LLRB)
tdi.label = make(map[int64]*llrb.LLRB)
return &tdi
func NewQuadDirectionIndex() QuadDirectionIndex {
return QuadDirectionIndex{[...]map[int64]*b.Tree{
quad.Subject - 1: make(map[int64]*b.Tree),
quad.Predicate - 1: make(map[int64]*b.Tree),
quad.Object - 1: make(map[int64]*b.Tree),
quad.Label - 1: make(map[int64]*b.Tree),
}}
}
func (tdi *TripleDirectionIndex) GetForDir(d quad.Direction) map[int64]*llrb.LLRB {
switch d {
case quad.Subject:
return tdi.subject
case quad.Object:
return tdi.object
case quad.Predicate:
return tdi.predicate
case quad.Label:
return tdi.label
func (qdi QuadDirectionIndex) Tree(d quad.Direction, id int64) *b.Tree {
if d < quad.Subject || d > quad.Label {
panic("illegal direction")
}
panic("illegal direction")
}
func (tdi *TripleDirectionIndex) GetOrCreate(d quad.Direction, id int64) *llrb.LLRB {
directionIndex := tdi.GetForDir(d)
if _, ok := directionIndex[id]; !ok {
directionIndex[id] = llrb.New()
tree, ok := qdi.index[d-1][id]
if !ok {
tree = b.TreeNew(cmp)
qdi.index[d-1][id] = tree
}
return directionIndex[id]
return tree
}
func (tdi *TripleDirectionIndex) Get(d quad.Direction, id int64) (*llrb.LLRB, bool) {
directionIndex := tdi.GetForDir(d)
tree, exists := directionIndex[id]
return tree, exists
func (qdi QuadDirectionIndex) Get(d quad.Direction, id int64) (*b.Tree, bool) {
if d < quad.Subject || d > quad.Label {
panic("illegal direction")
}
tree, ok := qdi.index[d-1][id]
return tree, ok
}
type LogEntry struct {
graph.Delta
DeletedBy int64
}
type TripleStore struct {
idCounter int64
tripleIdCounter int64
idMap map[string]int64
revIdMap map[int64]string
triples []quad.Quad
size int64
index TripleDirectionIndex
// vip_index map[string]map[int64]map[string]map[int64]*llrb.Tree
idCounter int64
quadIdCounter int64
idMap map[string]int64
revIdMap map[int64]string
log []LogEntry
size int64
index QuadDirectionIndex
// vip_index map[string]map[int64]map[string]map[int64]*b.Tree
}
func newTripleStore() *TripleStore {
var ts TripleStore
ts.idMap = make(map[string]int64)
ts.revIdMap = make(map[int64]string)
ts.triples = make([]quad.Quad, 1, 200)
return &TripleStore{
idMap: make(map[string]int64),
revIdMap: make(map[int64]string),
// Sentinel null triple so triple indices start at 1
ts.triples[0] = quad.Quad{}
ts.size = 1
ts.index = *NewTripleDirectionIndex()
ts.idCounter = 1
ts.tripleIdCounter = 1
return &ts
}
// Sentinel null entry so indices start at 1
log: make([]LogEntry, 1, 200),
func (ts *TripleStore) AddTripleSet(triples []quad.Quad) {
for _, t := range triples {
ts.AddTriple(t)
index: NewQuadDirectionIndex(),
idCounter: 1,
quadIdCounter: 1,
}
}
func (ts *TripleStore) tripleExists(t quad.Quad) (bool, int64) {
smallest := -1
var smallest_tree *llrb.LLRB
func (ts *TripleStore) ApplyDeltas(deltas []graph.Delta) error {
for _, d := range deltas {
var err error
if d.Action == graph.Add {
err = ts.AddDelta(d)
} else {
err = ts.RemoveDelta(d)
}
if err != nil {
return err
}
}
return nil
}
const maxInt = int(^uint(0) >> 1)
func (ts *TripleStore) indexOf(t quad.Quad) (int64, bool) {
min := maxInt
var tree *b.Tree
for d := quad.Subject; d <= quad.Label; d++ {
sid := t.Get(d)
if d == quad.Label && sid == "" {
continue
}
id, ok := ts.idMap[sid]
// If we've never heard about a node, it most not exist
// If we've never heard about a node, it must not exist
if !ok {
return false, 0
return 0, false
}
index, exists := ts.index.Get(d, id)
if !exists {
index, ok := ts.index.Get(d, id)
if !ok {
// If it's never been indexed in this direction, it can't exist.
return false, 0
return 0, false
}
if smallest == -1 || index.Len() < smallest {
smallest = index.Len()
smallest_tree = index
if l := index.Len(); l < min {
min, tree = l, index
}
}
it := NewLlrbIterator(smallest_tree, "")
it := NewIterator(tree, "", ts)
for it.Next() {
val := it.Result()
if t == ts.triples[val.(int64)] {
return true, val.(int64)
if t == ts.log[val.(int64)].Quad {
return val.(int64), true
}
}
return false, 0
return 0, false
}
func (ts *TripleStore) AddTriple(t quad.Quad) {
if exists, _ := ts.tripleExists(t); exists {
return
func (ts *TripleStore) AddDelta(d graph.Delta) error {
if _, exists := ts.indexOf(d.Quad); exists {
return graph.ErrQuadExists
}
var tripleID int64
ts.triples = append(ts.triples, t)
tripleID = ts.tripleIdCounter
qid := ts.quadIdCounter
ts.log = append(ts.log, LogEntry{Delta: d})
ts.size++
ts.tripleIdCounter++
ts.quadIdCounter++
for d := quad.Subject; d <= quad.Label; d++ {
sid := t.Get(d)
if d == quad.Label && sid == "" {
for dir := quad.Subject; dir <= quad.Label; dir++ {
sid := d.Quad.Get(dir)
if dir == quad.Label && sid == "" {
continue
}
if _, ok := ts.idMap[sid]; !ok {
@ -163,87 +165,60 @@ func (ts *TripleStore) AddTriple(t quad.Quad) {
}
}
for d := quad.Subject; d <= quad.Label; d++ {
if d == quad.Label && t.Get(d) == "" {
for dir := quad.Subject; dir <= quad.Label; dir++ {
if dir == quad.Label && d.Quad.Get(dir) == "" {
continue
}
id := ts.idMap[t.Get(d)]
tree := ts.index.GetOrCreate(d, id)
tree.ReplaceOrInsert(Int64(tripleID))
id := ts.idMap[d.Quad.Get(dir)]
tree := ts.index.Tree(dir, id)
tree.Set(qid, struct{}{})
}
// TODO(barakmich): Add VIP indexing
return nil
}
func (ts *TripleStore) RemoveTriple(t quad.Quad) {
var tripleID int64
var exists bool
tripleID = 0
if exists, tripleID = ts.tripleExists(t); !exists {
return
func (ts *TripleStore) RemoveDelta(d graph.Delta) error {
prevQuadID, exists := ts.indexOf(d.Quad)
if !exists {
return graph.ErrQuadNotExist
}
ts.triples[tripleID] = quad.Quad{}
quadID := ts.quadIdCounter
ts.log = append(ts.log, LogEntry{Delta: d})
ts.log[prevQuadID].DeletedBy = quadID
ts.size--
for d := quad.Subject; d <= quad.Label; d++ {
if d == quad.Label && t.Get(d) == "" {
continue
}
id := ts.idMap[t.Get(d)]
tree := ts.index.GetOrCreate(d, id)
tree.Delete(Int64(tripleID))
}
for d := quad.Subject; d <= quad.Label; d++ {
if d == quad.Label && t.Get(d) == "" {
continue
}
id, ok := ts.idMap[t.Get(d)]
if !ok {
continue
}
stillExists := false
for d := quad.Subject; d <= quad.Label; d++ {
if d == quad.Label && t.Get(d) == "" {
continue
}
nodeTree := ts.index.GetOrCreate(d, id)
if nodeTree.Len() != 0 {
stillExists = true
break
}
}
if !stillExists {
delete(ts.idMap, t.Get(d))
delete(ts.revIdMap, id)
}
}
ts.quadIdCounter++
return nil
}
func (ts *TripleStore) Quad(index graph.Value) quad.Quad {
return ts.triples[index.(int64)]
return ts.log[index.(int64)].Quad
}
func (ts *TripleStore) TripleIterator(d quad.Direction, value graph.Value) graph.Iterator {
index, ok := ts.index.Get(d, value.(int64))
data := fmt.Sprintf("dir:%s val:%d", d, value.(int64))
if ok {
return NewLlrbIterator(index, data)
return NewIterator(index, data, ts)
}
return &iterator.Null{}
}
func (ts *TripleStore) Horizon() int64 {
return ts.log[len(ts.log)-1].ID
}
func (ts *TripleStore) Size() int64 {
return ts.size - 1 // Don't count the sentinel
return ts.size
}
func (ts *TripleStore) DebugPrint() {
for i, t := range ts.triples {
for i, l := range ts.log {
if i == 0 {
continue
}
glog.V(2).Infof("%d: %s", i, t)
glog.V(2).Infof("%d: %#v", i, l)
}
}
@ -256,7 +231,7 @@ func (ts *TripleStore) NameOf(id graph.Value) string {
}
func (ts *TripleStore) TriplesAllIterator() graph.Iterator {
return iterator.NewInt64(0, ts.Size())
return NewMemstoreQuadsAllIterator(ts)
}
func (ts *TripleStore) FixedIterator() graph.FixedIterator {
@ -269,6 +244,7 @@ func (ts *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph.
}
func (ts *TripleStore) NodesAllIterator() graph.Iterator {
return NewMemstoreAllIterator(ts)
return NewMemstoreNodesAllIterator(ts)
}
func (ts *TripleStore) Close() {}

View file

@ -22,6 +22,7 @@ import (
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad"
"github.com/google/cayley/writer"
)
// This is a simple test graph.
@ -51,13 +52,14 @@ var simpleGraph = []quad.Quad{
{"G", "status", "cool", "status_graph"},
}
func makeTestStore(data []quad.Quad) (*TripleStore, []pair) {
func makeTestStore(data []quad.Quad) (*TripleStore, graph.QuadWriter, []pair) {
seen := make(map[string]struct{})
ts := newTripleStore()
var (
val int64
ind []pair
)
writer, _ := writer.NewSingleReplication(ts, nil)
for _, t := range data {
for _, qp := range []string{t.Subject, t.Predicate, t.Object, t.Label} {
if _, ok := seen[qp]; !ok && qp != "" {
@ -66,9 +68,10 @@ func makeTestStore(data []quad.Quad) (*TripleStore, []pair) {
seen[qp] = struct{}{}
}
}
ts.AddTriple(t)
writer.AddQuad(t)
}
return ts, ind
return ts, writer, ind
}
type pair struct {
@ -77,7 +80,7 @@ type pair struct {
}
func TestMemstore(t *testing.T) {
ts, index := makeTestStore(simpleGraph)
ts, _, index := makeTestStore(simpleGraph)
if size := ts.Size(); size != int64(len(simpleGraph)) {
t.Errorf("Triple store has unexpected size, got:%d expected %d", size, len(simpleGraph))
}
@ -95,7 +98,7 @@ func TestMemstore(t *testing.T) {
}
func TestIteratorsAndNextResultOrderA(t *testing.T) {
ts, _ := makeTestStore(simpleGraph)
ts, _, _ := makeTestStore(simpleGraph)
fixed := ts.FixedIterator()
fixed.Add(ts.ValueOf("C"))
@ -144,7 +147,7 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) {
}
func TestLinksToOptimization(t *testing.T) {
ts, _ := makeTestStore(simpleGraph)
ts, _, _ := makeTestStore(simpleGraph)
fixed := ts.FixedIterator()
fixed.Add(ts.ValueOf("cool"))
@ -172,9 +175,9 @@ func TestLinksToOptimization(t *testing.T) {
}
func TestRemoveTriple(t *testing.T) {
ts, _ := makeTestStore(simpleGraph)
ts, w, _ := makeTestStore(simpleGraph)
ts.RemoveTriple(quad.Quad{"E", "follows", "F", ""})
w.RemoveQuad(quad.Quad{"E", "follows", "F", ""})
fixed := ts.FixedIterator()
fixed.Add(ts.ValueOf("E"))

View file

@ -45,17 +45,7 @@ type Iterator struct {
func NewIterator(qs *TripleStore, collection string, d quad.Direction, val graph.Value) *Iterator {
name := qs.NameOf(val)
var constraint bson.M
switch d {
case quad.Subject:
constraint = bson.M{"Subject": name}
case quad.Predicate:
constraint = bson.M{"Predicate": name}
case quad.Object:
constraint = bson.M{"Object": name}
case quad.Label:
constraint = bson.M{"Label": name}
}
constraint := bson.M{d.String(): name}
size, err := qs.db.C(collection).Find(constraint).Count()
if err != nil {
@ -140,10 +130,9 @@ func (it *Iterator) Clone() graph.Iterator {
func (it *Iterator) Next() bool {
var result struct {
Id string "_id"
//Sub string "Sub"
//Pred string "Pred"
//Obj string "Obj"
Id string "_id"
Added []int64 "Added"
Deleted []int64 "Deleted"
}
found := it.iter.Next(&result)
if !found {
@ -153,6 +142,9 @@ func (it *Iterator) Next() bool {
}
return false
}
if it.collection == "quads" && len(result.Added) <= len(result.Deleted) {
return it.Next()
}
it.result = result.Id
return true
}

View file

@ -18,7 +18,6 @@ import (
"crypto/sha1"
"encoding/hex"
"hash"
"io"
"sync"
"gopkg.in/mgo.v2"
@ -34,9 +33,6 @@ func init() {
graph.RegisterTripleStore("mongo", true, newTripleStore, createNewMongoGraph)
}
// Guarantee we satisfy graph.Bulkloader.
var _ graph.BulkLoader = (*TripleStore)(nil)
const DefaultDBName = "cayley"
var (
@ -64,19 +60,27 @@ func createNewMongoGraph(addr string, options graph.Options) error {
}
db := conn.DB(dbName)
indexOpts := mgo.Index{
Key: []string{"Sub"},
Key: []string{"subject"},
Unique: false,
DropDups: false,
Background: true,
Sparse: true,
}
db.C("triples").EnsureIndex(indexOpts)
indexOpts.Key = []string{"Pred"}
db.C("triples").EnsureIndex(indexOpts)
indexOpts.Key = []string{"Obj"}
db.C("triples").EnsureIndex(indexOpts)
indexOpts.Key = []string{"Label"}
db.C("triples").EnsureIndex(indexOpts)
db.C("quads").EnsureIndex(indexOpts)
indexOpts.Key = []string{"predicate"}
db.C("quads").EnsureIndex(indexOpts)
indexOpts.Key = []string{"object"}
db.C("quads").EnsureIndex(indexOpts)
indexOpts.Key = []string{"label"}
db.C("quads").EnsureIndex(indexOpts)
logOpts := mgo.Index{
Key: []string{"LogID"},
Unique: true,
DropDups: false,
Background: true,
Sparse: true,
}
db.C("log").EnsureIndex(logOpts)
return nil
}
@ -97,7 +101,7 @@ func newTripleStore(addr string, options graph.Options) (graph.TripleStore, erro
return &qs, nil
}
func (qs *TripleStore) getIdForTriple(t quad.Quad) string {
func (qs *TripleStore) getIdForQuad(t quad.Quad) string {
id := qs.convertStringToByteHash(t.Subject)
id += qs.convertStringToByteHash(t.Predicate)
id += qs.convertStringToByteHash(t.Object)
@ -122,125 +126,157 @@ type MongoNode struct {
Size int "Size"
}
func (qs *TripleStore) updateNodeBy(node_name string, inc int) {
var size MongoNode
node := qs.ValueOf(node_name)
err := qs.db.C("nodes").FindId(node).One(&size)
if err != nil {
if err.Error() == "not found" {
// Not found. Okay.
size.Id = node.(string)
size.Name = node_name
size.Size = inc
} else {
glog.Errorf("Error: %v", err)
return
}
} else {
size.Id = node.(string)
size.Name = node_name
size.Size += inc
}
// Removing something...
if inc < 0 {
if size.Size <= 0 {
err := qs.db.C("nodes").RemoveId(node)
if err != nil {
glog.Errorf("Error: %v while removing node %s", err, node_name)
return
}
}
}
_, err2 := qs.db.C("nodes").UpsertId(node, size)
if err2 != nil {
glog.Errorf("Error: %v", err)
}
type MongoLogEntry struct {
LogID int64 "LogID"
Action string "Action"
Key string "Key"
Timestamp int64
}
func (qs *TripleStore) writeTriple(t quad.Quad) bool {
tripledoc := bson.M{
"_id": qs.getIdForTriple(t),
"Subject": t.Subject,
"Predicate": t.Predicate,
"Object": t.Object,
"Label": t.Label,
func (qs *TripleStore) updateNodeBy(node_name string, inc int) error {
node := qs.ValueOf(node_name)
doc := bson.M{
"_id": node.(string),
"Name": node_name,
}
err := qs.db.C("triples").Insert(tripledoc)
upsert := bson.M{
"$setOnInsert": doc,
"$inc": bson.M{
"Size": inc,
},
}
_, err := qs.db.C("nodes").UpsertId(node, upsert)
if err != nil {
glog.Errorf("Error updating node: %v", err)
}
return err
}
func (qs *TripleStore) updateQuad(q quad.Quad, id int64, proc graph.Procedure) error {
var setname string
if proc == graph.Add {
setname = "Added"
} else if proc == graph.Delete {
setname = "Deleted"
}
upsert := bson.M{
"$setOnInsert": q,
"$push": bson.M{
setname: id,
},
}
_, err := qs.db.C("quads").UpsertId(qs.getIdForQuad(q), upsert)
if err != nil {
// Among the reasons I hate MongoDB. "Errors don't happen! Right guys?"
if err.(*mgo.LastError).Code == 11000 {
return false
}
glog.Errorf("Error: %v", err)
}
return err
}
func (qs *TripleStore) checkValid(key string) bool {
var indexEntry struct {
Added []int64 "Added"
Deleted []int64 "Deleted"
}
err := qs.db.C("quads").FindId(key).One(&indexEntry)
if err == mgo.ErrNotFound {
return false
}
if err != nil {
glog.Errorln("Other error checking valid quad: %s %v.", key, err)
return false
}
if len(indexEntry.Added) <= len(indexEntry.Deleted) {
return false
}
return true
}
func (qs *TripleStore) AddTriple(t quad.Quad) {
_ = qs.writeTriple(t)
qs.updateNodeBy(t.Subject, 1)
qs.updateNodeBy(t.Predicate, 1)
qs.updateNodeBy(t.Object, 1)
if t.Label != "" {
qs.updateNodeBy(t.Label, 1)
func (qs *TripleStore) updateLog(d graph.Delta) error {
var action string
if d.Action == graph.Add {
action = "Add"
} else {
action = "Delete"
}
entry := MongoLogEntry{
LogID: d.ID,
Action: action,
Key: qs.getIdForQuad(d.Quad),
Timestamp: d.Timestamp.UnixNano(),
}
err := qs.db.C("log").Insert(entry)
if err != nil {
glog.Errorf("Error updating log: %v", err)
}
return err
}
func (qs *TripleStore) AddTripleSet(in []quad.Quad) {
func (qs *TripleStore) ApplyDeltas(in []graph.Delta) error {
qs.session.SetSafe(nil)
ids := make(map[string]int)
for _, t := range in {
wrote := qs.writeTriple(t)
if wrote {
ids[t.Subject]++
ids[t.Object]++
ids[t.Predicate]++
if t.Label != "" {
ids[t.Label]++
// Pre-check the existence condition.
for _, d := range in {
key := qs.getIdForQuad(d.Quad)
switch d.Action {
case graph.Add:
if qs.checkValid(key) {
return graph.ErrQuadExists
}
case graph.Delete:
if !qs.checkValid(key) {
return graph.ErrQuadNotExist
}
}
}
if glog.V(2) {
glog.Infoln("Existence verified. Proceeding.")
}
for _, d := range in {
err := qs.updateLog(d)
if err != nil {
return err
}
}
for _, d := range in {
err := qs.updateQuad(d.Quad, d.ID, d.Action)
if err != nil {
return err
}
var countdelta int
if d.Action == graph.Add {
countdelta = 1
} else {
countdelta = -1
}
ids[d.Quad.Subject] += countdelta
ids[d.Quad.Object] += countdelta
ids[d.Quad.Predicate] += countdelta
if d.Quad.Label != "" {
ids[d.Quad.Label] += countdelta
}
}
for k, v := range ids {
qs.updateNodeBy(k, v)
err := qs.updateNodeBy(k, v)
if err != nil {
return err
}
}
qs.session.SetSafe(&mgo.Safe{})
}
func (qs *TripleStore) RemoveTriple(t quad.Quad) {
err := qs.db.C("triples").RemoveId(qs.getIdForTriple(t))
if err == mgo.ErrNotFound {
return
} else if err != nil {
glog.Errorf("Error: %v while removing triple %v", err, t)
return
}
qs.updateNodeBy(t.Subject, -1)
qs.updateNodeBy(t.Predicate, -1)
qs.updateNodeBy(t.Object, -1)
if t.Label != "" {
qs.updateNodeBy(t.Label, -1)
}
return nil
}
func (qs *TripleStore) Quad(val graph.Value) quad.Quad {
var bsonDoc bson.M
err := qs.db.C("triples").FindId(val.(string)).One(&bsonDoc)
var q quad.Quad
err := qs.db.C("quads").FindId(val.(string)).One(&q)
if err != nil {
glog.Errorf("Error: Couldn't retrieve triple %s %v", val, err)
}
return quad.Quad{
bsonDoc["Subject"].(string),
bsonDoc["Predicate"].(string),
bsonDoc["Object"].(string),
bsonDoc["Label"].(string),
glog.Errorf("Error: Couldn't retrieve quad %s %v", val, err)
}
return q
}
func (qs *TripleStore) TripleIterator(d quad.Direction, val graph.Value) graph.Iterator {
return NewIterator(qs, "triples", d, val)
return NewIterator(qs, "quads", d, val)
}
func (qs *TripleStore) NodesAllIterator() graph.Iterator {
@ -248,7 +284,7 @@ func (qs *TripleStore) NodesAllIterator() graph.Iterator {
}
func (qs *TripleStore) TriplesAllIterator() graph.Iterator {
return NewAllIterator(qs, "triples")
return NewAllIterator(qs, "quads")
}
func (qs *TripleStore) ValueOf(s string) graph.Value {
@ -270,7 +306,8 @@ func (qs *TripleStore) NameOf(v graph.Value) string {
}
func (qs *TripleStore) Size() int64 {
count, err := qs.db.C("triples").Count()
// TODO(barakmich): Make size real; store it in the log, and retrieve it.
count, err := qs.db.C("quads").Count()
if err != nil {
glog.Errorf("Error: %v", err)
return 0
@ -278,6 +315,18 @@ func (qs *TripleStore) Size() int64 {
return int64(count)
}
func (qs *TripleStore) Horizon() int64 {
var log MongoLogEntry
err := qs.db.C("log").Find(nil).Sort("-LogID").One(&log)
if err != nil {
if err == mgo.ErrNotFound {
return 0
}
glog.Errorf("Could not get Horizon from Mongo: %v", err)
}
return log.LogID
}
func compareStrings(a, b graph.Value) bool {
return a.(string) == b.(string)
}
@ -307,61 +356,4 @@ func (qs *TripleStore) TripleDirection(in graph.Value, d quad.Direction) graph.V
return val
}
func (qs *TripleStore) BulkLoad(dec quad.Unmarshaler) error {
if qs.Size() != 0 {
return graph.ErrCannotBulkLoad
}
qs.session.SetSafe(nil)
for {
q, err := dec.Unmarshal()
if err != nil {
if err != io.EOF {
return err
}
break
}
qs.writeTriple(q)
}
outputTo := bson.M{"replace": "nodes", "sharded": true}
glog.Infoln("Mapreducing")
job := mgo.MapReduce{
Map: `function() {
var len = this["_id"].length
var s_key = this["_id"].slice(0, len / 4)
var p_key = this["_id"].slice(len / 4, 2 * len / 4)
var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4)
var c_key = this["_id"].slice(3 * len / 4)
emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1})
emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1})
emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1})
if (this.Label != "") {
emit(c_key, {"_id": c_key, "Name" : this.Label, "Size" : 1})
}
}
`,
Reduce: `
function(key, value_list) {
out = {"_id": key, "Name": value_list[0].Name}
count = 0
for (var i = 0; i < value_list.length; i++) {
count = count + value_list[i].Size
}
out["Size"] = count
return out
}
`,
Out: outputTo,
}
qs.db.C("triples").Find(nil).MapReduce(&job, nil)
glog.Infoln("Fixing")
qs.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) {
db.nodes.update({"_id": result._id}, result.value)
}) }`}, {"args", bson.D{}}}, nil)
qs.session.SetSafe(&mgo.Safe{})
return nil
}
// TODO(barakmich): Rewrite bulk loader. For now, iterating around blocks is the way we'll go about it.

101
graph/quadwriter.go Normal file
View file

@ -0,0 +1,101 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package graph
// Defines the interface for consistent replication of a graph instance.
//
// Separate from the backend, this dictates how individual triples get
// identified and replicated consistently across (potentially) multiple
// instances. The simplest case is to keep an append-only log of triple
// changes.
import (
"errors"
"time"
"github.com/google/cayley/quad"
)
type Procedure byte
// The different types of actions a transaction can do.
const (
Add Procedure = iota
Delete
)
type Delta struct {
ID int64
Quad quad.Quad
Action Procedure
Timestamp time.Time
}
type Handle struct {
QuadStore TripleStore
QuadWriter QuadWriter
}
func (h *Handle) Close() {
h.QuadStore.Close()
h.QuadWriter.Close()
}
var (
ErrQuadExists = errors.New("Quad exists")
ErrQuadNotExist = errors.New("Quad doesn't exist")
)
type QuadWriter interface {
// Add a quad to the store.
AddQuad(quad.Quad) error
// Add a set of quads to the store, atomically if possible.
AddQuadSet([]quad.Quad) error
// Removes a quad matching the given one from the database,
// if it exists. Does nothing otherwise.
RemoveQuad(quad.Quad) error
// Cleans up replication and closes the writing aspect of the database.
Close() error
}
type NewQuadWriterFunc func(TripleStore, Options) (QuadWriter, error)
var writerRegistry = make(map[string]NewQuadWriterFunc)
func RegisterWriter(name string, newFunc NewQuadWriterFunc) {
if _, found := writerRegistry[name]; found {
panic("already registered TripleWriter " + name)
}
writerRegistry[name] = newFunc
}
func NewQuadWriter(name string, ts TripleStore, opts Options) (QuadWriter, error) {
newFunc, hasNew := writerRegistry[name]
if !hasNew {
return nil, errors.New("replication: name '" + name + "' is not registered")
}
return newFunc(ts, opts)
}
func WriterMethods() []string {
t := make([]string, 0, len(writerRegistry))
for n := range writerRegistry {
t = append(t, n)
}
return t
}

View file

@ -42,15 +42,9 @@ import (
type Value interface{}
type TripleStore interface {
// Add a triple to the store.
AddTriple(quad.Quad)
// Add a set of triples to the store, atomically if possible.
AddTripleSet([]quad.Quad)
// Removes a triple matching the given one from the database,
// if it exists. Does nothing otherwise.
RemoveTriple(quad.Quad)
// The only way in is through building a transaction, which
// is done by a replication strategy.
ApplyDeltas([]Delta) error
// Given an opaque token, returns the triple for that token from the store.
Quad(Value) quad.Quad
@ -75,6 +69,9 @@ type TripleStore interface {
// Returns the number of triples currently stored.
Size() int64
// The last replicated transaction ID that this triplestore has verified.
Horizon() int64
// Creates a fixed iterator which can compare Values
FixedIterator() FixedIterator

View file

@ -107,7 +107,7 @@ func (h *TemplateRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
type Api struct {
config *config.Config
ts graph.TripleStore
handle *graph.Handle
}
func (api *Api) ApiV1(r *httprouter.Router) {
@ -119,7 +119,7 @@ func (api *Api) ApiV1(r *httprouter.Router) {
r.POST("/api/v1/delete", LogRequest(api.ServeV1Delete))
}
func SetupRoutes(ts graph.TripleStore, cfg *config.Config) {
func SetupRoutes(handle *graph.Handle, cfg *config.Config) {
r := httprouter.New()
assets := findAssetsPath()
if glog.V(2) {
@ -129,7 +129,7 @@ func SetupRoutes(ts graph.TripleStore, cfg *config.Config) {
templates.ParseGlob(fmt.Sprint(assets, "/templates/*.html"))
root := &TemplateRequestHandler{templates: templates}
docs := &DocRequestHandler{assets: assets}
api := &Api{config: cfg, ts: ts}
api := &Api{config: cfg, handle: handle}
api.ApiV1(r)
//m.Use(martini.Static("static", martini.StaticOptions{Prefix: "/static", SkipLogging: true}))
@ -141,8 +141,8 @@ func SetupRoutes(ts graph.TripleStore, cfg *config.Config) {
http.Handle("/", r)
}
func Serve(ts graph.TripleStore, cfg *config.Config) {
SetupRoutes(ts, cfg)
func Serve(handle *graph.Handle, cfg *config.Config) {
SetupRoutes(handle, cfg)
glog.Infof("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort)
fmt.Printf("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort)
err := http.ListenAndServe(fmt.Sprintf("%s:%s", cfg.ListenHost, cfg.ListenPort), nil)

View file

@ -71,9 +71,9 @@ func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params http
var ses query.HttpSession
switch params.ByName("query_lang") {
case "gremlin":
ses = gremlin.NewSession(api.ts, api.config.Timeout, false)
ses = gremlin.NewSession(api.handle.QuadStore, api.config.Timeout, false)
case "mql":
ses = mql.NewSession(api.ts)
ses = mql.NewSession(api.handle.QuadStore)
default:
return FormatJson400(w, "Need a query language.")
}
@ -110,18 +110,15 @@ func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params http
ses = nil
return FormatJsonError(w, 500, "Incomplete data?")
}
http.Error(w, "", http.StatusNotFound)
ses = nil
return http.StatusNotFound
}
func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params httprouter.Params) int {
var ses query.HttpSession
switch params.ByName("query_lang") {
case "gremlin":
ses = gremlin.NewSession(api.ts, api.config.Timeout, false)
ses = gremlin.NewSession(api.handle.QuadStore, api.config.Timeout, false)
case "mql":
ses = mql.NewSession(api.ts)
ses = mql.NewSession(api.handle.QuadStore)
default:
return FormatJson400(w, "Need a query language.")
}
@ -146,6 +143,4 @@ func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params http
default:
return FormatJsonError(w, 500, "Incomplete data?")
}
http.Error(w, "", http.StatusNotFound)
return http.StatusNotFound
}

View file

@ -55,7 +55,7 @@ func (api *Api) ServeV1Write(w http.ResponseWriter, r *http.Request, _ httproute
if terr != nil {
return FormatJson400(w, terr)
}
api.ts.AddTripleSet(tripleList)
api.handle.QuadWriter.AddQuadSet(tripleList)
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", len(tripleList))
return 200
}
@ -97,11 +97,11 @@ func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params
block = append(block, t)
n++
if len(block) == cap(block) {
api.ts.AddTripleSet(block)
api.handle.QuadWriter.AddQuadSet(block)
block = block[:0]
}
}
api.ts.AddTripleSet(block)
api.handle.QuadWriter.AddQuadSet(block)
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", n)
@ -122,7 +122,7 @@ func (api *Api) ServeV1Delete(w http.ResponseWriter, r *http.Request, params htt
}
count := 0
for _, triple := range tripleList {
api.ts.RemoveTriple(triple)
api.handle.QuadWriter.RemoveQuad(triple)
count++
}
fmt.Fprintf(w, "{\"result\": \"Successfully deleted %d triples.\"}", count)

View file

@ -23,6 +23,7 @@ import (
"github.com/google/cayley/quad"
_ "github.com/google/cayley/graph/memstore"
_ "github.com/google/cayley/writer"
)
// This is a simple test graph.
@ -54,8 +55,9 @@ var simpleGraph = []quad.Quad{
func makeTestSession(data []quad.Quad) *Session {
ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
for _, t := range data {
ts.AddTriple(t)
w.AddQuad(t)
}
return NewSession(ts, -1, false)
}

View file

@ -22,6 +22,7 @@ import (
"github.com/google/cayley/graph"
_ "github.com/google/cayley/graph/memstore"
"github.com/google/cayley/quad"
_ "github.com/google/cayley/writer"
)
// This is a simple test graph.
@ -53,8 +54,9 @@ var simpleGraph = []quad.Quad{
func makeTestSession(data []quad.Quad) *Session {
ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
for _, t := range data {
ts.AddTriple(t)
w.AddQuad(t)
}
return NewSession(ts)
}

View file

@ -21,6 +21,7 @@ import (
"github.com/google/cayley/quad"
_ "github.com/google/cayley/graph/memstore"
_ "github.com/google/cayley/writer"
)
func TestBadParse(t *testing.T) {
@ -55,13 +56,14 @@ var testQueries = []struct {
func TestMemstoreBackedSexp(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
it := BuildIteratorTreeForQuery(ts, "()")
if it.Type() != graph.Null {
t.Errorf(`Incorrect type for empty query, got:%q expect: "null"`, it.Type())
}
for _, test := range testQueries {
if test.add.IsValid() {
ts.AddTriple(test.add)
w.AddQuad(test.add)
}
it := BuildIteratorTreeForQuery(ts, test.query)
if it.Type() != test.typ {
@ -79,8 +81,9 @@ func TestMemstoreBackedSexp(t *testing.T) {
func TestTreeConstraintParse(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil)
ts.AddTriple(quad.Quad{"i", "like", "food", ""})
ts.AddTriple(quad.Quad{"food", "is", "good", ""})
w, _ := graph.NewQuadWriter("single", ts, nil)
w.AddQuad(quad.Quad{"i", "like", "food", ""})
w.AddQuad(quad.Quad{"food", "is", "good", ""})
query := "(\"i\"\n" +
"(:like\n" +
"($a (:is :good))))"
@ -99,8 +102,9 @@ func TestTreeConstraintParse(t *testing.T) {
func TestTreeConstraintTagParse(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil)
ts.AddTriple(quad.Quad{"i", "like", "food", ""})
ts.AddTriple(quad.Quad{"food", "is", "good", ""})
w, _ := graph.NewQuadWriter("single", ts, nil)
w.AddQuad(quad.Quad{"i", "like", "food", ""})
w.AddQuad(quad.Quad{"food", "is", "good", ""})
query := "(\"i\"\n" +
"(:like\n" +
"($a (:is :good))))"
@ -118,12 +122,13 @@ func TestTreeConstraintTagParse(t *testing.T) {
func TestMultipleConstraintParse(t *testing.T) {
ts, _ := graph.NewTripleStore("memstore", "", nil)
w, _ := graph.NewQuadWriter("single", ts, nil)
for _, tv := range []quad.Quad{
{"i", "like", "food", ""},
{"i", "like", "beer", ""},
{"you", "like", "beer", ""},
} {
ts.AddTriple(tv)
w.AddQuad(tv)
}
query := `(
$a

91
writer/single.go Normal file
View file

@ -0,0 +1,91 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package writer
import (
"sync"
"time"
"github.com/google/cayley/graph"
"github.com/google/cayley/quad"
)
func init() {
graph.RegisterWriter("single", NewSingleReplication)
}
type Single struct {
nextID int64
ts graph.TripleStore
mut sync.Mutex
}
func NewSingleReplication(ts graph.TripleStore, opts graph.Options) (graph.QuadWriter, error) {
horizon := ts.Horizon()
rep := &Single{nextID: horizon + 1, ts: ts}
if horizon <= 0 {
rep.nextID = 1
}
return rep, nil
}
func (s *Single) AcquireNextID() int64 {
s.mut.Lock()
defer s.mut.Unlock()
id := s.nextID
s.nextID++
return id
}
func (s *Single) AddQuad(q quad.Quad) error {
deltas := make([]graph.Delta, 1)
deltas[0] = graph.Delta{
ID: s.AcquireNextID(),
Quad: q,
Action: graph.Add,
Timestamp: time.Now(),
}
return s.ts.ApplyDeltas(deltas)
}
func (s *Single) AddQuadSet(set []quad.Quad) error {
deltas := make([]graph.Delta, len(set))
for i, q := range set {
deltas[i] = graph.Delta{
ID: s.AcquireNextID(),
Quad: q,
Action: graph.Add,
Timestamp: time.Now(),
}
}
s.ts.ApplyDeltas(deltas)
return nil
}
func (s *Single) RemoveQuad(q quad.Quad) error {
deltas := make([]graph.Delta, 1)
deltas[0] = graph.Delta{
ID: s.AcquireNextID(),
Quad: q,
Action: graph.Delete,
Timestamp: time.Now(),
}
return s.ts.ApplyDeltas(deltas)
}
func (s *Single) Close() error {
// Nothing to clean up locally.
return nil
}