diff --git a/.travis.yml b/.travis.yml index efb83a3..08d2ea7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ go: install: - go get github.com/badgerodon/peg - go get github.com/barakmich/glog + - go get github.com/cznic/mathutil - go get github.com/julienschmidt/httprouter - go get github.com/petar/GoLLRB/llrb - go get github.com/peterh/liner diff --git a/cayley.go b/cayley.go index c06af2a..d7364b9 100644 --- a/cayley.go +++ b/cayley.go @@ -44,6 +44,9 @@ import ( _ "github.com/google/cayley/graph/leveldb" _ "github.com/google/cayley/graph/memstore" _ "github.com/google/cayley/graph/mongo" + + // Load writer registry + _ "github.com/google/cayley/writer" ) var ( @@ -105,8 +108,8 @@ func main() { } var ( - ts graph.TripleStore - err error + handle *graph.Handle + err error ) switch cmd { case "version": @@ -123,60 +126,60 @@ func main() { break } if *tripleFile != "" { - ts, err = db.Open(cfg) + handle, err = db.Open(cfg) if err != nil { break } - err = load(ts, cfg, *tripleFile, *tripleType) + err = load(handle.QuadWriter, cfg, *tripleFile, *tripleType) if err != nil { break } - ts.Close() + handle.Close() } case "load": - ts, err = db.Open(cfg) + handle, err = db.Open(cfg) if err != nil { break } - err = load(ts, cfg, *tripleFile, *tripleType) + err = load(handle.QuadWriter, cfg, *tripleFile, *tripleType) if err != nil { break } - ts.Close() + handle.Close() case "repl": - ts, err = db.Open(cfg) + handle, err = db.Open(cfg) if err != nil { break } if !graph.IsPersistent(cfg.DatabaseType) { - err = load(ts, cfg, "", *tripleType) + err = load(handle.QuadWriter, cfg, "", *tripleType) if err != nil { break } } - err = db.Repl(ts, *queryLanguage, cfg) + err = db.Repl(handle, *queryLanguage, cfg) - ts.Close() + handle.Close() case "http": - ts, err = db.Open(cfg) + handle, err = db.Open(cfg) if err != nil { break } if !graph.IsPersistent(cfg.DatabaseType) { - err = load(ts, cfg, "", *tripleType) + err = load(handle.QuadWriter, cfg, "", *tripleType) if err != nil { break } } - http.Serve(ts, cfg) + http.Serve(handle, cfg) - ts.Close() + handle.Close() default: fmt.Println("No command", cmd) @@ -187,7 +190,29 @@ func main() { } } -func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error { +func load(qw graph.QuadWriter, cfg *config.Config, path, typ string) error { + return decompressAndLoad(qw, cfg, path, typ, db.Load) +} + +func removeAll(qw graph.QuadWriter, cfg *config.Config, path, typ string) error { + return decompressAndLoad(qw, cfg, path, typ, remove) +} + +func remove(qw graph.QuadWriter, cfg *config.Config, dec quad.Unmarshaler) error { + for { + t, err := dec.Unmarshal() + if err != nil { + if err == io.EOF { + break + } + return err + } + qw.RemoveQuad(t) + } + return nil +} + +func decompressAndLoad(qw graph.QuadWriter, cfg *config.Config, path, typ string, loadFn func(graph.QuadWriter, *config.Config, quad.Unmarshaler) error) error { var r io.Reader if path == "" { @@ -233,7 +258,7 @@ func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error { return fmt.Errorf("unknown quad format %q", typ) } - return db.Load(ts, cfg, dec) + return db.Load(qw, cfg, dec) } const ( diff --git a/cayley_test.go b/cayley_test.go index f7297b8..a92d947 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -297,26 +297,45 @@ var m2_actors = movie2.Save("name","movie2").Follow(filmToActor) ` var ( - once sync.Once - cfg = &config.Config{ - DatabasePath: "30kmoviedata.nq.gz", - DatabaseType: "memstore", - Timeout: 300 * time.Second, + create sync.Once + deleteAndRecreate sync.Once + cfg = &config.Config{ + DatabasePath: "30kmoviedata.nq.gz", + DatabaseType: "memstore", + ReplicationType: "single", + Timeout: 300 * time.Second, } - ts graph.TripleStore + handle *graph.Handle ) func prepare(t testing.TB) { var err error - once.Do(func() { - ts, err = db.Open(cfg) + create.Do(func() { + handle, err = db.Open(cfg) if err != nil { t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err) } if !graph.IsPersistent(cfg.DatabaseType) { - err = load(ts, cfg, "", "cquad") + err = load(handle.QuadWriter, cfg, "", "cquad") + if err != nil { + t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) + } + } + }) +} + +func deletePrepare(t testing.TB) { + var err error + deleteAndRecreate.Do(func() { + prepare(t) + if !graph.IsPersistent(cfg.DatabaseType) { + err = removeAll(handle.QuadWriter, cfg, "", "cquad") + if err != nil { + t.Fatalf("Failed to remove %q: %v", cfg.DatabasePath, err) + } + err = load(handle.QuadWriter, cfg, "", "cquad") if err != nil { t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) } @@ -326,11 +345,23 @@ func prepare(t testing.TB) { func TestQueries(t *testing.T) { prepare(t) + checkQueries(t) +} + +func TestDeletedAndRecreatedQueries(t *testing.T) { + if testing.Short() { + t.Skip() + } + deletePrepare(t) + checkQueries(t) +} + +func checkQueries(t *testing.T) { for _, test := range benchmarkQueries { if testing.Short() && test.long { continue } - ses := gremlin.NewSession(ts, cfg.Timeout, true) + ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true) _, err := ses.InputParses(test.query) if err != nil { t.Fatalf("Failed to parse benchmark gremlin %s: %v", test.message, err) @@ -382,7 +413,7 @@ func runBench(n int, b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { c := make(chan interface{}, 5) - ses := gremlin.NewSession(ts, cfg.Timeout, true) + ses := gremlin.NewSession(handle.QuadStore, cfg.Timeout, true) // Do the parsing we know works. ses.InputParses(benchmarkQueries[n].query) b.StartTimer() diff --git a/config/config.go b/config/config.go index d8cb4d8..ff06e7c 100644 --- a/config/config.go +++ b/config/config.go @@ -26,25 +26,29 @@ import ( ) type Config struct { - DatabaseType string - DatabasePath string - DatabaseOptions map[string]interface{} - ListenHost string - ListenPort string - ReadOnly bool - Timeout time.Duration - LoadSize int + DatabaseType string + DatabasePath string + DatabaseOptions map[string]interface{} + ReplicationType string + ReplicationOptions map[string]interface{} + ListenHost string + ListenPort string + ReadOnly bool + Timeout time.Duration + LoadSize int } type config struct { - DatabaseType string `json:"database"` - DatabasePath string `json:"db_path"` - DatabaseOptions map[string]interface{} `json:"db_options"` - ListenHost string `json:"listen_host"` - ListenPort string `json:"listen_port"` - ReadOnly bool `json:"read_only"` - Timeout duration `json:"timeout"` - LoadSize int `json:"load_size"` + DatabaseType string `json:"database"` + DatabasePath string `json:"db_path"` + DatabaseOptions map[string]interface{} `json:"db_options"` + ReplicationType string `json:"replication"` + ReplicationOptions map[string]interface{} `json:"replication_options"` + ListenHost string `json:"listen_host"` + ListenPort string `json:"listen_port"` + ReadOnly bool `json:"read_only"` + Timeout duration `json:"timeout"` + LoadSize int `json:"load_size"` } func (c *Config) UnmarshalJSON(data []byte) error { @@ -54,28 +58,32 @@ func (c *Config) UnmarshalJSON(data []byte) error { return err } *c = Config{ - DatabaseType: t.DatabaseType, - DatabasePath: t.DatabasePath, - DatabaseOptions: t.DatabaseOptions, - ListenHost: t.ListenHost, - ListenPort: t.ListenPort, - ReadOnly: t.ReadOnly, - Timeout: time.Duration(t.Timeout), - LoadSize: t.LoadSize, + DatabaseType: t.DatabaseType, + DatabasePath: t.DatabasePath, + DatabaseOptions: t.DatabaseOptions, + ReplicationType: t.ReplicationType, + ReplicationOptions: t.ReplicationOptions, + ListenHost: t.ListenHost, + ListenPort: t.ListenPort, + ReadOnly: t.ReadOnly, + Timeout: time.Duration(t.Timeout), + LoadSize: t.LoadSize, } return nil } func (c *Config) MarshalJSON() ([]byte, error) { return json.Marshal(config{ - DatabaseType: c.DatabaseType, - DatabasePath: c.DatabasePath, - DatabaseOptions: c.DatabaseOptions, - ListenHost: c.ListenHost, - ListenPort: c.ListenPort, - ReadOnly: c.ReadOnly, - Timeout: duration(c.Timeout), - LoadSize: c.LoadSize, + DatabaseType: c.DatabaseType, + DatabasePath: c.DatabasePath, + DatabaseOptions: c.DatabaseOptions, + ReplicationType: c.ReplicationType, + ReplicationOptions: c.ReplicationOptions, + ListenHost: c.ListenHost, + ListenPort: c.ListenPort, + ReadOnly: c.ReadOnly, + Timeout: duration(c.Timeout), + LoadSize: c.LoadSize, }) } @@ -115,13 +123,14 @@ func (d *duration) MarshalJSON() ([]byte, error) { } var ( - databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") - databaseBackend = flag.String("db", "memstore", "Database Backend.") - host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).") - loadSize = flag.Int("load_size", 10000, "Size of triplesets to load") - port = flag.String("port", "64210", "Port to listen on.") - readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.") - timeout = flag.Duration("timeout", 30*time.Second, "Elapsed time until an individual query times out.") + databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") + databaseBackend = flag.String("db", "memstore", "Database Backend.") + replicationBackend = flag.String("replication", "single", "Replication method.") + host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).") + loadSize = flag.Int("load_size", 10000, "Size of triplesets to load") + port = flag.String("port", "64210", "Port to listen on.") + readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.") + timeout = flag.Duration("timeout", 30*time.Second, "Elapsed time until an individual query times out.") ) func ParseConfigFromFile(filename string) *Config { @@ -175,6 +184,10 @@ func ParseConfigFromFlagsAndFile(fileFlag string) *Config { config.DatabaseType = *databaseBackend } + if config.ReplicationType == "" { + config.ReplicationType = *replicationBackend + } + if config.ListenHost == "" { config.ListenHost = *host } diff --git a/db/db.go b/db/db.go index 8ea30db..e5370ee 100644 --- a/db/db.go +++ b/db/db.go @@ -36,8 +36,20 @@ func Init(cfg *config.Config) error { return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) } -func Open(cfg *config.Config) (graph.TripleStore, error) { - glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath) +func Open(cfg *config.Config) (*graph.Handle, error) { + qs, err := OpenQuadStore(cfg) + if err != nil { + return nil, err + } + qw, err := OpenQuadWriter(qs, cfg) + if err != nil { + return nil, err + } + return &graph.Handle{QuadStore: qs, QuadWriter: qw}, nil +} + +func OpenQuadStore(cfg *config.Config) (graph.TripleStore, error) { + glog.Infof("Opening quad store %q at %s", cfg.DatabaseType, cfg.DatabasePath) ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) if err != nil { return nil, err @@ -46,19 +58,17 @@ func Open(cfg *config.Config) (graph.TripleStore, error) { return ts, nil } -func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { - bulker, canBulk := ts.(graph.BulkLoader) - if canBulk { - switch err := bulker.BulkLoad(dec); err { - case nil: - return nil - case graph.ErrCannotBulkLoad: - // Try individual loading. - default: - return err - } +func OpenQuadWriter(qs graph.TripleStore, cfg *config.Config) (graph.QuadWriter, error) { + glog.Infof("Opening replication method %q", cfg.ReplicationType) + w, err := graph.NewQuadWriter(cfg.ReplicationType, qs, cfg.ReplicationOptions) + if err != nil { + return nil, err } + return w, nil +} + +func Load(qw graph.QuadWriter, cfg *config.Config, dec quad.Unmarshaler) error { block := make([]quad.Quad, 0, cfg.LoadSize) for { t, err := dec.Unmarshal() @@ -70,11 +80,11 @@ func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error } block = append(block, t) if len(block) == cap(block) { - ts.AddTripleSet(block) + qw.AddQuadSet(block) block = block[:0] } } - ts.AddTripleSet(block) + qw.AddQuadSet(block) return nil } diff --git a/db/repl.go b/db/repl.go index 11ecfea..ac36ea4 100644 --- a/db/repl.go +++ b/db/repl.go @@ -70,17 +70,17 @@ const ( history = ".cayley_history" ) -func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error { +func Repl(h *graph.Handle, queryLanguage string, cfg *config.Config) error { var ses query.Session switch queryLanguage { case "sexp": - ses = sexp.NewSession(ts) + ses = sexp.NewSession(h.QuadStore) case "mql": - ses = mql.NewSession(ts) + ses = mql.NewSession(h.QuadStore) case "gremlin": fallthrough default: - ses = gremlin.NewSession(ts, cfg.Timeout, true) + ses = gremlin.NewSession(h.QuadStore, cfg.Timeout, true) } term, err := terminal(history) @@ -124,25 +124,25 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error continue case strings.HasPrefix(line, ":a"): - triple, err := cquads.Parse(line[3:]) - if !triple.IsValid() { + quad, err := cquads.Parse(line[3:]) + if !quad.IsValid() { if err != nil { - fmt.Printf("not a valid triple: %v\n", err) + fmt.Printf("not a valid quad: %v\n", err) } continue } - ts.AddTriple(triple) + h.QuadWriter.AddQuad(quad) continue case strings.HasPrefix(line, ":d"): - triple, err := cquads.Parse(line[3:]) - if !triple.IsValid() { + quad, err := cquads.Parse(line[3:]) + if !quad.IsValid() { if err != nil { - fmt.Printf("not a valid triple: %v\n", err) + fmt.Printf("not a valid quad: %v\n", err) } continue } - ts.RemoveTriple(triple) + h.QuadWriter.RemoveQuad(quad) continue } } diff --git a/graph/iterator/mock_ts_test.go b/graph/iterator/mock_ts_test.go index 08483c7..4b186ac 100644 --- a/graph/iterator/mock_ts_test.go +++ b/graph/iterator/mock_ts_test.go @@ -36,9 +36,7 @@ func (qs *store) ValueOf(s string) graph.Value { return nil } -func (qs *store) AddTriple(quad.Quad) {} - -func (qs *store) AddTripleSet([]quad.Quad) {} +func (qs *store) ApplyDeltas([]graph.Delta) error { return nil } func (qs *store) Quad(graph.Value) quad.Quad { return quad.Quad{} } @@ -60,6 +58,8 @@ func (qs *store) NameOf(v graph.Value) string { func (qs *store) Size() int64 { return 0 } +func (qs *store) Horizon() int64 { return 0 } + func (qs *store) DebugPrint() {} func (qs *store) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { diff --git a/graph/leveldb/iterator.go b/graph/leveldb/iterator.go index f87af3b..a23d15d 100644 --- a/graph/leveldb/iterator.go +++ b/graph/leveldb/iterator.go @@ -16,9 +16,11 @@ package leveldb import ( "bytes" + "encoding/json" "fmt" "strings" + "github.com/barakmich/glog" ldbit "github.com/syndtr/goleveldb/leveldb/iterator" "github.com/syndtr/goleveldb/leveldb/opt" @@ -65,10 +67,9 @@ func NewIterator(prefix string, d quad.Direction, value graph.Value, qs *TripleS ok := it.iter.Seek(it.nextPrefix) if !ok { - // FIXME(kortschak) What are the semantics here? Is this iterator usable? - // If not, we should return nil *Iterator and an error. it.open = false it.iter.Release() + glog.Error("Opening LevelDB iterator couldn't seek to location ", it.nextPrefix) } return &it @@ -117,6 +118,12 @@ func (it *Iterator) Close() { } } +func (it *Iterator) isLiveValue(val []byte) bool { + var entry IndexEntry + json.Unmarshal(val, &entry) + return len(entry.History)%2 != 0 +} + func (it *Iterator) Next() bool { if it.iter == nil { it.result = nil @@ -132,6 +139,9 @@ func (it *Iterator) Next() bool { return false } if bytes.HasPrefix(it.iter.Key(), it.nextPrefix) { + if !it.isLiveValue(it.iter.Value()) { + return it.Next() + } out := make([]byte, len(it.iter.Key())) copy(out, it.iter.Key()) it.result = Token(out) @@ -173,7 +183,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { case quad.Object: return 2*hashSize + 2 case quad.Label: - return -1 + return 3*hashSize + 2 } } if bytes.Equal(prefix, []byte("po")) { @@ -185,7 +195,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { case quad.Object: return hashSize + 2 case quad.Label: - return -1 + return hashSize + 2 } } if bytes.Equal(prefix, []byte("os")) { @@ -197,7 +207,7 @@ func PositionOf(prefix []byte, d quad.Direction, qs *TripleStore) int { case quad.Object: return 2 case quad.Label: - return -1 + return 3*hashSize + 2 } } if bytes.Equal(prefix, []byte("cp")) { @@ -221,16 +231,17 @@ func (it *Iterator) Contains(v graph.Value) bool { return false } offset := PositionOf(val[0:2], it.dir, it.qs) - if offset != -1 { - if bytes.HasPrefix(val[offset:], it.checkId[1:]) { - return true - } - } else { - nameForDir := it.qs.Quad(v).Get(it.dir) - hashForDir := it.qs.ValueOf(nameForDir).(Token) - if bytes.Equal(hashForDir, it.checkId) { - return true - } + if bytes.HasPrefix(val[offset:], it.checkId[1:]) { + // You may ask, why don't we check to see if it's a valid (not deleted) triple + // again? + // + // We've already done that -- in order to get the graph.Value token in the + // first place, we had to have done the check already; it came from a Next(). + // + // However, if it ever starts coming from somewhere else, it'll be more + // efficient to change the interface of the graph.Value for LevelDB to a + // struct with a flag for isValid, to save another random read. + return true } return false } diff --git a/graph/leveldb/leveldb_test.go b/graph/leveldb/leveldb_test.go index 47fd91c..45a7404 100644 --- a/graph/leveldb/leveldb_test.go +++ b/graph/leveldb/leveldb_test.go @@ -24,6 +24,7 @@ import ( "github.com/google/cayley/graph" "github.com/google/cayley/graph/iterator" "github.com/google/cayley/quad" + "github.com/google/cayley/writer" ) func makeTripleSet() []quad.Quad { @@ -135,7 +136,8 @@ func TestLoadDatabase(t *testing.T) { t.Error("Failed to create leveldb TripleStore.") } - qs.AddTriple(quad.Quad{"Something", "points_to", "Something Else", "context"}) + w, _ := writer.NewSingleReplication(qs, nil) + w.AddQuad(quad.Quad{"Something", "points_to", "Something Else", "context"}) for _, pq := range []string{"Something", "points_to", "Something Else", "context"} { if got := qs.NameOf(qs.ValueOf(pq)); got != pq { t.Errorf("Failed to roundtrip %q, got:%q expect:%q", pq, got, pq) @@ -154,13 +156,14 @@ func TestLoadDatabase(t *testing.T) { if qs == nil || err != nil { t.Error("Failed to create leveldb TripleStore.") } + w, _ = writer.NewSingleReplication(qs, nil) ts2, didConvert := qs.(*TripleStore) if !didConvert { t.Errorf("Could not convert from generic to LevelDB TripleStore") } - qs.AddTripleSet(makeTripleSet()) + w.AddQuadSet(makeTripleSet()) if s := qs.Size(); s != 11 { t.Errorf("Unexpected triplestore size, got:%d expect:11", s) } @@ -168,7 +171,7 @@ func TestLoadDatabase(t *testing.T) { t.Errorf("Unexpected triplestore size, got:%d expect:5", s) } - qs.RemoveTriple(quad.Quad{"A", "follows", "B", ""}) + w.RemoveQuad(quad.Quad{"A", "follows", "B", ""}) if s := qs.Size(); s != 10 { t.Errorf("Unexpected triplestore size after RemoveTriple, got:%d expect:10", s) } @@ -196,7 +199,9 @@ func TestIterator(t *testing.T) { if qs == nil || err != nil { t.Error("Failed to create leveldb TripleStore.") } - qs.AddTripleSet(makeTripleSet()) + + w, _ := writer.NewSingleReplication(qs, nil) + w.AddQuadSet(makeTripleSet()) var it graph.Iterator it = qs.NodesAllIterator() @@ -291,7 +296,8 @@ func TestSetIterator(t *testing.T) { } defer qs.Close() - qs.AddTripleSet(makeTripleSet()) + w, _ := writer.NewSingleReplication(qs, nil) + w.AddQuadSet(makeTripleSet()) expect := []quad.Quad{ {"C", "follows", "B", ""}, @@ -403,7 +409,9 @@ func TestOptimize(t *testing.T) { if qs == nil || err != nil { t.Error("Failed to create leveldb TripleStore.") } - qs.AddTripleSet(makeTripleSet()) + + w, _ := writer.NewSingleReplication(qs, nil) + w.AddQuadSet(makeTripleSet()) // With an linksto-fixed pair fixed := qs.FixedIterator() diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index e18e683..dee63f8 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -19,6 +19,7 @@ import ( "crypto/sha1" "encoding/binary" "encoding/json" + "errors" "fmt" "hash" "sync" @@ -62,6 +63,7 @@ type TripleStore struct { path string open bool size int64 + horizon int64 writeopts *opt.WriteOptions readopts *opt.ReadOptions } @@ -85,6 +87,7 @@ func createNewLevelDB(path string, _ graph.Options) error { func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) { var qs TripleStore + var err error qs.path = path cache_size := DefaultCacheSize if val, ok := options.IntKey("cache_size_mb"); ok { @@ -106,11 +109,15 @@ func newTripleStore(path string, options graph.Options) (graph.TripleStore, erro qs.readopts = &opt.ReadOptions{} db, err := leveldb.OpenFile(qs.path, qs.dbOpts) if err != nil { - panic("Error, couldn't open! " + err.Error()) + glog.Errorln("Error, couldn't open! ", err) + return nil, err } qs.db = db glog.Infoln(qs.GetStats()) - qs.getSize() + err = qs.getMetadata() + if err != nil { + return nil, err + } return &qs, nil } @@ -128,24 +135,25 @@ func (qs *TripleStore) Size() int64 { return qs.size } -func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { +func (qs *TripleStore) Horizon() int64 { + return qs.horizon +} + +func (qa *TripleStore) createDeltaKeyFor(d graph.Delta) []byte { + key := make([]byte, 0, 19) + key = append(key, 'd') + key = append(key, []byte(fmt.Sprintf("%018x", d.ID))...) + return key +} + +func (qs *TripleStore) createKeyFor(d [4]quad.Direction, triple quad.Quad) []byte { key := make([]byte, 0, 2+(hashSize*3)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) - return key -} - -func (qs *TripleStore) createProvKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { - key := make([]byte, 0, 2+(hashSize*4)) - // TODO(kortschak) Remove dependence on String() method. - key = append(key, []byte{quad.Label.Prefix(), d[0].Prefix()}...) - key = append(key, qs.convertStringToByteHash(triple.Get(quad.Label))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) - key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) + key = append(key, qs.convertStringToByteHash(triple.Get(d[3]))...) return key } @@ -156,76 +164,98 @@ func (qs *TripleStore) createValueKeyFor(s string) []byte { return key } -func (qs *TripleStore) AddTriple(t quad.Quad) { - batch := &leveldb.Batch{} - qs.buildWrite(batch, t) - err := qs.db.Write(batch, qs.writeopts) - if err != nil { - glog.Errorf("Couldn't write to DB for triple %s.", t) - return - } - qs.size++ +type IndexEntry struct { + quad.Quad + History []int64 } // Short hand for direction permutations. var ( - spo = [3]quad.Direction{quad.Subject, quad.Predicate, quad.Object} - osp = [3]quad.Direction{quad.Object, quad.Subject, quad.Predicate} - pos = [3]quad.Direction{quad.Predicate, quad.Object, quad.Subject} - pso = [3]quad.Direction{quad.Predicate, quad.Subject, quad.Object} + spo = [4]quad.Direction{quad.Subject, quad.Predicate, quad.Object, quad.Label} + osp = [4]quad.Direction{quad.Object, quad.Subject, quad.Predicate, quad.Label} + pos = [4]quad.Direction{quad.Predicate, quad.Object, quad.Subject, quad.Label} + cps = [4]quad.Direction{quad.Label, quad.Predicate, quad.Subject, quad.Object} ) -func (qs *TripleStore) RemoveTriple(t quad.Quad) { - _, err := qs.db.Get(qs.createKeyFor(spo, t), qs.readopts) - if err != nil && err != leveldb.ErrNotFound { - glog.Error("Couldn't access DB to confirm deletion") - return - } - if err == leveldb.ErrNotFound { - // No such triple in the database, forget about it. - return - } +func (qs *TripleStore) ApplyDeltas(deltas []graph.Delta) error { batch := &leveldb.Batch{} - batch.Delete(qs.createKeyFor(spo, t)) - batch.Delete(qs.createKeyFor(osp, t)) - batch.Delete(qs.createKeyFor(pos, t)) - qs.UpdateValueKeyBy(t.Get(quad.Subject), -1, batch) - qs.UpdateValueKeyBy(t.Get(quad.Predicate), -1, batch) - qs.UpdateValueKeyBy(t.Get(quad.Object), -1, batch) - if t.Get(quad.Label) != "" { - batch.Delete(qs.createProvKeyFor(pso, t)) - qs.UpdateValueKeyBy(t.Get(quad.Label), -1, batch) + resizeMap := make(map[string]int64) + size_change := int64(0) + for _, d := range deltas { + bytes, err := json.Marshal(d) + if err != nil { + return err + } + batch.Put(qs.createDeltaKeyFor(d), bytes) + err = qs.buildQuadWrite(batch, d.Quad, d.ID, d.Action == graph.Add) + if err != nil { + return err + } + delta := int64(1) + if d.Action == graph.Delete { + delta = int64(-1) + } + resizeMap[d.Quad.Subject] += delta + resizeMap[d.Quad.Predicate] += delta + resizeMap[d.Quad.Object] += delta + if d.Quad.Label != "" { + resizeMap[d.Quad.Label] += delta + } + size_change += delta + qs.horizon = d.ID } - err = qs.db.Write(batch, nil) + for k, v := range resizeMap { + if v != 0 { + err := qs.UpdateValueKeyBy(k, v, batch) + if err != nil { + return err + } + } + } + err := qs.db.Write(batch, qs.writeopts) if err != nil { - glog.Errorf("Couldn't delete triple %s.", t) - return + glog.Error("Couldn't write to DB for tripleset.") + return err } - qs.size-- + qs.size += size_change + return nil } -func (qs *TripleStore) buildTripleWrite(batch *leveldb.Batch, t quad.Quad) { - bytes, err := json.Marshal(t) - if err != nil { - glog.Errorf("Couldn't write to buffer for triple %s: %s", t, err) - return +func (qs *TripleStore) buildQuadWrite(batch *leveldb.Batch, q quad.Quad, id int64, isAdd bool) error { + var entry IndexEntry + data, err := qs.db.Get(qs.createKeyFor(spo, q), qs.readopts) + if err != nil && err != leveldb.ErrNotFound { + glog.Error("Couldn't access DB to prepare index: ", err) + return err } - batch.Put(qs.createKeyFor(spo, t), bytes) - batch.Put(qs.createKeyFor(osp, t), bytes) - batch.Put(qs.createKeyFor(pos, t), bytes) - if t.Get(quad.Label) != "" { - batch.Put(qs.createProvKeyFor(pso, t), bytes) + if err == nil { + // We got something. + err = json.Unmarshal(data, &entry) + if err != nil { + return err + } + } else { + entry.Quad = q } -} + entry.History = append(entry.History, id) -func (qs *TripleStore) buildWrite(batch *leveldb.Batch, t quad.Quad) { - qs.buildTripleWrite(batch, t) - qs.UpdateValueKeyBy(t.Get(quad.Subject), 1, nil) - qs.UpdateValueKeyBy(t.Get(quad.Predicate), 1, nil) - qs.UpdateValueKeyBy(t.Get(quad.Object), 1, nil) - if t.Get(quad.Label) != "" { - qs.UpdateValueKeyBy(t.Get(quad.Label), 1, nil) + if isAdd && len(entry.History)%2 == 0 { + glog.Error("Entry History is out of sync for", entry) + return errors.New("Odd index history") } + + bytes, err := json.Marshal(entry) + if err != nil { + glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err) + return err + } + batch.Put(qs.createKeyFor(spo, q), bytes) + batch.Put(qs.createKeyFor(osp, q), bytes) + batch.Put(qs.createKeyFor(pos, q), bytes) + if q.Get(quad.Label) != "" { + batch.Put(qs.createKeyFor(cps, q), bytes) + } + return nil } type ValueData struct { @@ -233,15 +263,15 @@ type ValueData struct { Size int64 } -func (qs *TripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb.Batch) { - value := &ValueData{name, int64(amount)} +func (qs *TripleStore) UpdateValueKeyBy(name string, amount int64, batch *leveldb.Batch) error { + value := &ValueData{name, amount} key := qs.createValueKeyFor(name) b, err := qs.db.Get(key, qs.readopts) // Error getting the node from the database. if err != nil && err != leveldb.ErrNotFound { glog.Errorf("Error reading Value %s from the DB.", name) - return + return err } // Node exists in the database -- unmarshal and update. @@ -249,58 +279,28 @@ func (qs *TripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb. err = json.Unmarshal(b, value) if err != nil { glog.Errorf("Error: couldn't reconstruct value: %v", err) - return + return err } - value.Size += int64(amount) + value.Size += amount } // Are we deleting something? - if amount < 0 { - if value.Size <= 0 { - if batch == nil { - qs.db.Delete(key, qs.writeopts) - } else { - batch.Delete(key) - } - return - } + if value.Size <= 0 { + value.Size = 0 } // Repackage and rewrite. bytes, err := json.Marshal(&value) if err != nil { glog.Errorf("Couldn't write to buffer for value %s: %s", name, err) - return + return err } if batch == nil { qs.db.Put(key, bytes, qs.writeopts) } else { batch.Put(key, bytes) } -} - -func (qs *TripleStore) AddTripleSet(t_s []quad.Quad) { - batch := &leveldb.Batch{} - newTs := len(t_s) - resizeMap := make(map[string]int) - for _, t := range t_s { - qs.buildTripleWrite(batch, t) - resizeMap[t.Subject]++ - resizeMap[t.Predicate]++ - resizeMap[t.Object]++ - if t.Label != "" { - resizeMap[t.Label]++ - } - } - for k, v := range resizeMap { - qs.UpdateValueKeyBy(k, v, batch) - } - err := qs.db.Write(batch, qs.writeopts) - if err != nil { - glog.Error("Couldn't write to DB for tripleset.") - return - } - qs.size += int64(newTs) + return nil } func (qs *TripleStore) Close() { @@ -314,6 +314,16 @@ func (qs *TripleStore) Close() { } else { glog.Errorf("Couldn't convert size before closing!") } + buf.Reset() + err = binary.Write(buf, binary.LittleEndian, qs.horizon) + if err == nil { + werr := qs.db.Put([]byte("__horizon"), buf.Bytes(), qs.writeopts) + if werr != nil { + glog.Error("Couldn't write horizon before closing!") + } + } else { + glog.Errorf("Couldn't convert horizon before closing!") + } qs.db.Close() qs.open = false } @@ -386,23 +396,34 @@ func (qs *TripleStore) SizeOf(k graph.Value) int64 { return int64(qs.valueData(k.(Token)).Size) } -func (qs *TripleStore) getSize() { - var size int64 - b, err := qs.db.Get([]byte("__size"), qs.readopts) +func (qs *TripleStore) getInt64ForKey(key string, empty int64) (int64, error) { + var out int64 + b, err := qs.db.Get([]byte(key), qs.readopts) if err != nil && err != leveldb.ErrNotFound { - panic("Couldn't read size " + err.Error()) + glog.Errorln("Couldn't read " + key + ": " + err.Error()) + return 0, err } if err == leveldb.ErrNotFound { // Must be a new database. Cool - qs.size = 0 - return + return empty, nil } buf := bytes.NewBuffer(b) - err = binary.Read(buf, binary.LittleEndian, &size) + err = binary.Read(buf, binary.LittleEndian, &out) if err != nil { - glog.Errorln("Error: couldn't parse size") + glog.Errorln("Error: couldn't parse", key) + return 0, err } - qs.size = size + return out, nil +} + +func (qs *TripleStore) getMetadata() error { + var err error + qs.size, err = qs.getInt64ForKey("__size", 0) + if err != nil { + return err + } + qs.horizon, err = qs.getInt64ForKey("__horizon", 0) + return err } func (qs *TripleStore) SizeOfPrefix(pre []byte) (int64, error) { diff --git a/graph/memstore/all_iterator.go b/graph/memstore/all_iterator.go index 658254e..6717a13 100644 --- a/graph/memstore/all_iterator.go +++ b/graph/memstore/all_iterator.go @@ -24,19 +24,22 @@ type AllIterator struct { ts *TripleStore } -func NewMemstoreAllIterator(ts *TripleStore) *AllIterator { - var out AllIterator +type NodesAllIterator AllIterator +type QuadsAllIterator AllIterator + +func NewMemstoreNodesAllIterator(ts *TripleStore) *NodesAllIterator { + var out NodesAllIterator out.Int64 = *iterator.NewInt64(1, ts.idCounter-1) out.ts = ts return &out } // No subiterators. -func (it *AllIterator) SubIterators() []graph.Iterator { +func (it *NodesAllIterator) SubIterators() []graph.Iterator { return nil } -func (it *AllIterator) Next() bool { +func (it *NodesAllIterator) Next() bool { if !it.Int64.Next() { return false } @@ -46,3 +49,21 @@ func (it *AllIterator) Next() bool { } return true } + +func NewMemstoreQuadsAllIterator(ts *TripleStore) *QuadsAllIterator { + var out QuadsAllIterator + out.Int64 = *iterator.NewInt64(1, ts.quadIdCounter-1) + out.ts = ts + return &out +} + +func (qit *QuadsAllIterator) Next() bool { + out := qit.Int64.Next() + if out { + i64 := qit.Int64.Result().(int64) + if qit.ts.log[i64].DeletedBy != 0 || qit.ts.log[i64].Action == graph.Delete { + return qit.Next() + } + } + return out +} diff --git a/graph/memstore/b/keys.go b/graph/memstore/b/keys.go new file mode 100644 index 0000000..4593298 --- /dev/null +++ b/graph/memstore/b/keys.go @@ -0,0 +1,972 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package b implements a B+tree. +// +// Changelog +// +// 2014-06-26: Lower GC presure by recycling things. +// +// 2014-04-18: Added new method Put. +// +// Generic types +// +// Keys and their associated values are interface{} typed, similar to all of +// the containers in the standard library. +// +// Semiautomatic production of a type specific variant of this package is +// supported via +// +// $ make generic +// +// This command will write to stdout a version of the btree.go file where +// every key type occurrence is replaced by the word 'key' (written in all +// CAPS) and every value type occurrence is replaced by the word 'value' +// (written in all CAPS). Then you have to replace these tokens with your +// desired type(s), using any technique you're comfortable with. +// +// This is how, for example, 'example/int.go' was created: +// +// $ mkdir example +// $ +// $ # Note: the command bellow must be actually written using the words +// $ # 'key' and 'value' in all CAPS. The proper form is avoided in this +// $ # documentation to not confuse any text replacement mechanism. +// $ +// $ make generic | sed -e 's/key/int/g' -e 's/value/int/g' > example/int.go +// +// No other changes to int.go are necessary, it compiles just fine. +// +// Running the benchmarks for 1000 keys on a machine with Intel i5-4670 CPU @ +// 3.4GHz, Go release 1.3. +// +// $ go test -bench 1e3 example/all_test.go example/int.go +// PASS +// BenchmarkSetSeq1e3 10000 146740 ns/op +// BenchmarkGetSeq1e3 10000 108261 ns/op +// BenchmarkSetRnd1e3 10000 254359 ns/op +// BenchmarkGetRnd1e3 10000 134621 ns/op +// BenchmarkDelRnd1e3 10000 211864 ns/op +// BenchmarkSeekSeq1e3 10000 148628 ns/op +// BenchmarkSeekRnd1e3 10000 215166 ns/op +// BenchmarkNext1e3 200000 9211 ns/op +// BenchmarkPrev1e3 200000 8843 ns/op +// ok command-line-arguments 25.071s +// $ +package b + +import ( + "fmt" + "io" + "sync" +) + +const ( + kx = 32 //TODO benchmark tune this number if using custom key/value type(s). + kd = 32 //TODO benchmark tune this number if using custom key/value type(s). +) + +func init() { + if kd < 1 { + panic(fmt.Errorf("kd %d: out of range", kd)) + } + + if kx < 2 { + panic(fmt.Errorf("kx %d: out of range", kx)) + } +} + +var ( + btDPool = sync.Pool{New: func() interface{} { return &d{} }} + btEPool = btEpool{sync.Pool{New: func() interface{} { return &Enumerator{} }}} + btTPool = btTpool{sync.Pool{New: func() interface{} { return &Tree{} }}} + btXPool = sync.Pool{New: func() interface{} { return &x{} }} +) + +type btTpool struct{ sync.Pool } + +func (p *btTpool) get(cmp Cmp) *Tree { + x := p.Get().(*Tree) + x.cmp = cmp + return x +} + +type btEpool struct{ sync.Pool } + +func (p *btEpool) get(err error, hit bool, i int, k int64, q *d, t *Tree, ver int64) *Enumerator { + x := p.Get().(*Enumerator) + x.err, x.hit, x.i, x.k, x.q, x.t, x.ver = err, hit, i, k, q, t, ver + return x +} + +type ( + // Cmp compares a and b. Return value is: + // + // < 0 if a < b + // 0 if a == b + // > 0 if a > b + // + Cmp func(a, b int64) int + + d struct { // data page + c int + d [2*kd + 1]de + n *d + p *d + } + + de struct { // d element + k int64 + v struct{} + } + + // Enumerator captures the state of enumerating a tree. It is returned + // from the Seek* methods. The enumerator is aware of any mutations + // made to the tree in the process of enumerating it and automatically + // resumes the enumeration at the proper key, if possible. + // + // However, once an Enumerator returns io.EOF to signal "no more + // items", it does no more attempt to "resync" on tree mutation(s). In + // other words, io.EOF from an Enumaretor is "sticky" (idempotent). + Enumerator struct { + err error + hit bool + i int + k int64 + q *d + t *Tree + ver int64 + } + + // Tree is a B+tree. + Tree struct { + c int + cmp Cmp + first *d + last *d + r interface{} + ver int64 + } + + xe struct { // x element + ch interface{} + k int64 + } + + x struct { // index page + c int + x [2*kx + 2]xe + } +) + +var ( // R/O zero values + zd d + zde de + ze Enumerator + zk int64 + zt Tree + zx x + zxe xe +) + +func clr(q interface{}) { + switch x := q.(type) { + case *x: + for i := 0; i <= x.c; i++ { // Ch0 Sep0 ... Chn-1 Sepn-1 Chn + clr(x.x[i].ch) + } + *x = zx + btXPool.Put(x) + case *d: + *x = zd + btDPool.Put(x) + } +} + +// -------------------------------------------------------------------------- x + +func newX(ch0 interface{}) *x { + r := btXPool.Get().(*x) + r.x[0].ch = ch0 + return r +} + +func (q *x) extract(i int) { + q.c-- + if i < q.c { + copy(q.x[i:], q.x[i+1:q.c+1]) + q.x[q.c].ch = q.x[q.c+1].ch + q.x[q.c].k = zk // GC + q.x[q.c+1] = zxe // GC + } +} + +func (q *x) insert(i int, k int64, ch interface{}) *x { + c := q.c + if i < c { + q.x[c+1].ch = q.x[c].ch + copy(q.x[i+2:], q.x[i+1:c]) + q.x[i+1].k = q.x[i].k + } + c++ + q.c = c + q.x[i].k = k + q.x[i+1].ch = ch + return q +} + +func (q *x) siblings(i int) (l, r *d) { + if i >= 0 { + if i > 0 { + l = q.x[i-1].ch.(*d) + } + if i < q.c { + r = q.x[i+1].ch.(*d) + } + } + return +} + +// -------------------------------------------------------------------------- d + +func (l *d) mvL(r *d, c int) { + copy(l.d[l.c:], r.d[:c]) + copy(r.d[:], r.d[c:r.c]) + l.c += c + r.c -= c +} + +func (l *d) mvR(r *d, c int) { + copy(r.d[c:], r.d[:r.c]) + copy(r.d[:c], l.d[l.c-c:]) + r.c += c + l.c -= c +} + +// ----------------------------------------------------------------------- Tree + +// TreeNew returns a newly created, empty Tree. The compare function is used +// for key collation. +func TreeNew(cmp Cmp) *Tree { + return btTPool.get(cmp) +} + +// Clear removes all K/V pairs from the tree. +func (t *Tree) Clear() { + if t.r == nil { + return + } + + clr(t.r) + t.c, t.first, t.last, t.r = 0, nil, nil, nil + t.ver++ +} + +// Close performs Clear and recycles t to a pool for possible later reuse. No +// references to t should exist or such references must not be used afterwards. +func (t *Tree) Close() { + t.Clear() + *t = zt + btTPool.Put(t) +} + +func (t *Tree) cat(p *x, q, r *d, pi int) { + t.ver++ + q.mvL(r, r.c) + if r.n != nil { + r.n.p = q + } else { + t.last = q + } + q.n = r.n + *r = zd + btDPool.Put(r) + if p.c > 1 { + p.extract(pi) + p.x[pi].ch = q + } else { + switch x := t.r.(type) { + case *x: + *x = zx + btXPool.Put(x) + case *d: + *x = zd + btDPool.Put(x) + } + t.r = q + } +} + +func (t *Tree) catX(p, q, r *x, pi int) { + t.ver++ + q.x[q.c].k = p.x[pi].k + copy(q.x[q.c+1:], r.x[:r.c]) + q.c += r.c + 1 + q.x[q.c].ch = r.x[r.c].ch + *r = zx + btXPool.Put(r) + if p.c > 1 { + p.c-- + pc := p.c + if pi < pc { + p.x[pi].k = p.x[pi+1].k + copy(p.x[pi+1:], p.x[pi+2:pc+1]) + p.x[pc].ch = p.x[pc+1].ch + p.x[pc].k = zk // GC + p.x[pc+1].ch = nil // GC + } + return + } + + switch x := t.r.(type) { + case *x: + *x = zx + btXPool.Put(x) + case *d: + *x = zd + btDPool.Put(x) + } + t.r = q +} + +// Delete removes the k's KV pair, if it exists, in which case Delete returns +// true. +func (t *Tree) Delete(k int64) (ok bool) { + pi := -1 + var p *x + q := t.r + if q == nil { + return false + } + + for { + var i int + i, ok = t.find(q, k) + if ok { + switch x := q.(type) { + case *x: + if x.c < kx && q != t.r { + x, i = t.underflowX(p, x, pi, i) + } + pi = i + 1 + p = x + q = x.x[pi].ch + ok = false + continue + case *d: + t.extract(x, i) + if x.c >= kd { + return true + } + + if q != t.r { + t.underflow(p, x, pi) + } else if t.c == 0 { + t.Clear() + } + return true + } + } + + switch x := q.(type) { + case *x: + if x.c < kx && q != t.r { + x, i = t.underflowX(p, x, pi, i) + } + pi = i + p = x + q = x.x[i].ch + case *d: + return false + } + } +} + +func (t *Tree) extract(q *d, i int) { // (r struct{}) { + t.ver++ + //r = q.d[i].v // prepared for Extract + q.c-- + if i < q.c { + copy(q.d[i:], q.d[i+1:q.c+1]) + } + q.d[q.c] = zde // GC + t.c-- + return +} + +func (t *Tree) find(q interface{}, k int64) (i int, ok bool) { + var mk int64 + l := 0 + switch x := q.(type) { + case *x: + h := x.c - 1 + for l <= h { + m := (l + h) >> 1 + mk = x.x[m].k + switch cmp := t.cmp(k, mk); { + case cmp > 0: + l = m + 1 + case cmp == 0: + return m, true + default: + h = m - 1 + } + } + case *d: + h := x.c - 1 + for l <= h { + m := (l + h) >> 1 + mk = x.d[m].k + switch cmp := t.cmp(k, mk); { + case cmp > 0: + l = m + 1 + case cmp == 0: + return m, true + default: + h = m - 1 + } + } + } + return l, false +} + +// First returns the first item of the tree in the key collating order, or +// (zero-value, zero-value) if the tree is empty. +func (t *Tree) First() (k int64, v struct{}) { + if q := t.first; q != nil { + q := &q.d[0] + k, v = q.k, q.v + } + return +} + +// Get returns the value associated with k and true if it exists. Otherwise Get +// returns (zero-value, false). +func (t *Tree) Get(k int64) (v struct{}, ok bool) { + q := t.r + if q == nil { + return + } + + for { + var i int + if i, ok = t.find(q, k); ok { + switch x := q.(type) { + case *x: + q = x.x[i+1].ch + continue + case *d: + return x.d[i].v, true + } + } + switch x := q.(type) { + case *x: + q = x.x[i].ch + default: + return + } + } +} + +func (t *Tree) insert(q *d, i int, k int64, v struct{}) *d { + t.ver++ + c := q.c + if i < c { + copy(q.d[i+1:], q.d[i:c]) + } + c++ + q.c = c + q.d[i].k, q.d[i].v = k, v + t.c++ + return q +} + +// Last returns the last item of the tree in the key collating order, or +// (zero-value, zero-value) if the tree is empty. +func (t *Tree) Last() (k int64, v struct{}) { + if q := t.last; q != nil { + q := &q.d[q.c-1] + k, v = q.k, q.v + } + return +} + +// Len returns the number of items in the tree. +func (t *Tree) Len() int { + return t.c +} + +func (t *Tree) overflow(p *x, q *d, pi, i int, k int64, v struct{}) { + t.ver++ + l, r := p.siblings(pi) + + if l != nil && l.c < 2*kd { + l.mvL(q, 1) + t.insert(q, i-1, k, v) + p.x[pi-1].k = q.d[0].k + return + } + + if r != nil && r.c < 2*kd { + if i < 2*kd { + q.mvR(r, 1) + t.insert(q, i, k, v) + p.x[pi].k = r.d[0].k + } else { + t.insert(r, 0, k, v) + p.x[pi].k = k + } + return + } + + t.split(p, q, pi, i, k, v) +} + +// Seek returns an Enumerator positioned on a an item such that k >= item's +// key. ok reports if k == item.key The Enumerator's position is possibly +// after the last item in the tree. +func (t *Tree) Seek(k int64) (e *Enumerator, ok bool) { + q := t.r + if q == nil { + e = btEPool.get(nil, false, 0, k, nil, t, t.ver) + return + } + + for { + var i int + if i, ok = t.find(q, k); ok { + switch x := q.(type) { + case *x: + q = x.x[i+1].ch + continue + case *d: + return btEPool.get(nil, ok, i, k, x, t, t.ver), true + } + } + + switch x := q.(type) { + case *x: + q = x.x[i].ch + case *d: + return btEPool.get(nil, ok, i, k, x, t, t.ver), false + } + } +} + +// SeekFirst returns an enumerator positioned on the first KV pair in the tree, +// if any. For an empty tree, err == io.EOF is returned and e will be nil. +func (t *Tree) SeekFirst() (e *Enumerator, err error) { + q := t.first + if q == nil { + return nil, io.EOF + } + + return btEPool.get(nil, true, 0, q.d[0].k, q, t, t.ver), nil +} + +// SeekLast returns an enumerator positioned on the last KV pair in the tree, +// if any. For an empty tree, err == io.EOF is returned and e will be nil. +func (t *Tree) SeekLast() (e *Enumerator, err error) { + q := t.last + if q == nil { + return nil, io.EOF + } + + return btEPool.get(nil, true, q.c-1, q.d[q.c-1].k, q, t, t.ver), nil +} + +// Set sets the value associated with k. +func (t *Tree) Set(k int64, v struct{}) { + //dbg("--- PRE Set(%v, %v)\n%s", k, v, t.dump()) + //defer func() { + // dbg("--- POST\n%s\n====\n", t.dump()) + //}() + + pi := -1 + var p *x + q := t.r + if q == nil { + z := t.insert(btDPool.Get().(*d), 0, k, v) + t.r, t.first, t.last = z, z, z + return + } + + for { + i, ok := t.find(q, k) + if ok { + switch x := q.(type) { + case *x: + if x.c > 2*kx { + x, i = t.splitX(p, x, pi, i) + } + pi = i + 1 + p = x + q = x.x[i+1].ch + continue + case *d: + x.d[i].v = v + } + return + } + + switch x := q.(type) { + case *x: + if x.c > 2*kx { + x, i = t.splitX(p, x, pi, i) + } + pi = i + p = x + q = x.x[i].ch + case *d: + switch { + case x.c < 2*kd: + t.insert(x, i, k, v) + default: + t.overflow(p, x, pi, i, k, v) + } + return + } + } +} + +// Put combines Get and Set in a more efficient way where the tree is walked +// only once. The upd(ater) receives (old-value, true) if a KV pair for k +// exists or (zero-value, false) otherwise. It can then return a (new-value, +// true) to create or overwrite the existing value in the KV pair, or +// (whatever, false) if it decides not to create or not to update the value of +// the KV pair. +// +// tree.Set(k, v) call conceptually equals calling +// +// tree.Put(k, func(int64, bool){ return v, true }) +// +// modulo the differing return values. +func (t *Tree) Put(k int64, upd func(oldV struct{}, exists bool) (newV struct{}, write bool)) (oldV struct{}, written bool) { + pi := -1 + var p *x + q := t.r + var newV struct{} + if q == nil { + // new KV pair in empty tree + newV, written = upd(newV, false) + if !written { + return + } + + z := t.insert(btDPool.Get().(*d), 0, k, newV) + t.r, t.first, t.last = z, z, z + return + } + + for { + i, ok := t.find(q, k) + if ok { + switch x := q.(type) { + case *x: + if x.c > 2*kx { + x, i = t.splitX(p, x, pi, i) + } + pi = i + 1 + p = x + q = x.x[i+1].ch + continue + case *d: + oldV = x.d[i].v + newV, written = upd(oldV, true) + if !written { + return + } + + x.d[i].v = newV + } + return + } + + switch x := q.(type) { + case *x: + if x.c > 2*kx { + x, i = t.splitX(p, x, pi, i) + } + pi = i + p = x + q = x.x[i].ch + case *d: // new KV pair + newV, written = upd(newV, false) + if !written { + return + } + + switch { + case x.c < 2*kd: + t.insert(x, i, k, newV) + default: + t.overflow(p, x, pi, i, k, newV) + } + return + } + } +} + +func (t *Tree) split(p *x, q *d, pi, i int, k int64, v struct{}) { + t.ver++ + r := btDPool.Get().(*d) + if q.n != nil { + r.n = q.n + r.n.p = r + } else { + t.last = r + } + q.n = r + r.p = q + + copy(r.d[:], q.d[kd:2*kd]) + for i := range q.d[kd:] { + q.d[kd+i] = zde + } + q.c = kd + r.c = kd + var done bool + if i > kd { + done = true + t.insert(r, i-kd, k, v) + } + if pi >= 0 { + p.insert(pi, r.d[0].k, r) + } else { + t.r = newX(q).insert(0, r.d[0].k, r) + } + if done { + return + } + + t.insert(q, i, k, v) +} + +func (t *Tree) splitX(p *x, q *x, pi int, i int) (*x, int) { + t.ver++ + r := btXPool.Get().(*x) + copy(r.x[:], q.x[kx+1:]) + q.c = kx + r.c = kx + if pi >= 0 { + p.insert(pi, q.x[kx].k, r) + q.x[kx].k = zk + for i := range q.x[kx+1:] { + q.x[kx+i+1] = zxe + } + + switch { + case i < kx: + return q, i + case i == kx: + return p, pi + default: // i > kx + return r, i - kx - 1 + } + } + + nr := newX(q).insert(0, q.x[kx].k, r) + t.r = nr + q.x[kx].k = zk + for i := range q.x[kx+1:] { + q.x[kx+i+1] = zxe + } + + switch { + case i < kx: + return q, i + case i == kx: + return nr, 0 + default: // i > kx + return r, i - kx - 1 + } +} + +func (t *Tree) underflow(p *x, q *d, pi int) { + t.ver++ + l, r := p.siblings(pi) + + if l != nil && l.c+q.c >= 2*kd { + l.mvR(q, 1) + p.x[pi-1].k = q.d[0].k + } else if r != nil && q.c+r.c >= 2*kd { + q.mvL(r, 1) + p.x[pi].k = r.d[0].k + r.d[r.c] = zde // GC + } else if l != nil { + t.cat(p, l, q, pi-1) + } else { + t.cat(p, q, r, pi) + } +} + +func (t *Tree) underflowX(p *x, q *x, pi int, i int) (*x, int) { + t.ver++ + var l, r *x + + if pi >= 0 { + if pi > 0 { + l = p.x[pi-1].ch.(*x) + } + if pi < p.c { + r = p.x[pi+1].ch.(*x) + } + } + + if l != nil && l.c > kx { + q.x[q.c+1].ch = q.x[q.c].ch + copy(q.x[1:], q.x[:q.c]) + q.x[0].ch = l.x[l.c].ch + q.x[0].k = p.x[pi-1].k + q.c++ + i++ + l.c-- + p.x[pi-1].k = l.x[l.c].k + return q, i + } + + if r != nil && r.c > kx { + q.x[q.c].k = p.x[pi].k + q.c++ + q.x[q.c].ch = r.x[0].ch + p.x[pi].k = r.x[0].k + copy(r.x[:], r.x[1:r.c]) + r.c-- + rc := r.c + r.x[rc].ch = r.x[rc+1].ch + r.x[rc].k = zk + r.x[rc+1].ch = nil + return q, i + } + + if l != nil { + i += l.c + 1 + t.catX(p, l, q, pi-1) + q = l + return q, i + } + + t.catX(p, q, r, pi) + return q, i +} + +// ----------------------------------------------------------------- Enumerator + +// Close recycles e to a pool for possible later reuse. No references to e +// should exist or such references must not be used afterwards. +func (e *Enumerator) Close() { + *e = ze + btEPool.Put(e) +} + +// Next returns the currently enumerated item, if it exists and moves to the +// next item in the key collation order. If there is no item to return, err == +// io.EOF is returned. +func (e *Enumerator) Next() (k int64, v struct{}, err error) { + if err = e.err; err != nil { + return + } + + if e.ver != e.t.ver { + f, hit := e.t.Seek(e.k) + if !e.hit && hit { + if err = f.next(); err != nil { + return + } + } + + *e = *f + f.Close() + } + if e.q == nil { + e.err, err = io.EOF, io.EOF + return + } + + if e.i >= e.q.c { + if err = e.next(); err != nil { + return + } + } + + i := e.q.d[e.i] + k, v = i.k, i.v + e.k, e.hit = k, false + e.next() + return +} + +func (e *Enumerator) next() error { + if e.q == nil { + e.err = io.EOF + return io.EOF + } + + switch { + case e.i < e.q.c-1: + e.i++ + default: + if e.q, e.i = e.q.n, 0; e.q == nil { + e.err = io.EOF + } + } + return e.err +} + +// Prev returns the currently enumerated item, if it exists and moves to the +// previous item in the key collation order. If there is no item to return, err +// == io.EOF is returned. +func (e *Enumerator) Prev() (k int64, v struct{}, err error) { + if err = e.err; err != nil { + return + } + + if e.ver != e.t.ver { + f, hit := e.t.Seek(e.k) + if !e.hit && hit { + if err = f.prev(); err != nil { + return + } + } + + *e = *f + f.Close() + } + if e.q == nil { + e.err, err = io.EOF, io.EOF + return + } + + if e.i >= e.q.c { + if err = e.next(); err != nil { + return + } + } + + i := e.q.d[e.i] + k, v = i.k, i.v + e.k, e.hit = k, false + e.prev() + return +} + +func (e *Enumerator) prev() error { + if e.q == nil { + e.err = io.EOF + return io.EOF + } + + switch { + case e.i > 0: + e.i-- + default: + if e.q = e.q.p; e.q == nil { + e.err = io.EOF + break + } + + e.i = e.q.c - 1 + } + return e.err +} diff --git a/graph/memstore/b/keys_test.go b/graph/memstore/b/keys_test.go new file mode 100644 index 0000000..0425531 --- /dev/null +++ b/graph/memstore/b/keys_test.go @@ -0,0 +1,396 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package b + +import ( + "math" + "runtime/debug" + "testing" + + "github.com/cznic/mathutil" +) + +func rng() *mathutil.FC32 { + x, err := mathutil.NewFC32(math.MinInt32/4, math.MaxInt32/4, false) + if err != nil { + panic(err) + } + + return x +} + +func cmp(a, b int64) int { + return int(a - b) +} + +func BenchmarkSetSeq1e3(b *testing.B) { + benchmarkSetSeq(b, 1e3) +} + +func BenchmarkSetSeq1e4(b *testing.B) { + benchmarkSetSeq(b, 1e4) +} + +func BenchmarkSetSeq1e5(b *testing.B) { + benchmarkSetSeq(b, 1e5) +} + +func BenchmarkSetSeq1e6(b *testing.B) { + benchmarkSetSeq(b, 1e6) +} + +func benchmarkSetSeq(b *testing.B, n int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + r := TreeNew(cmp) + debug.FreeOSMemory() + b.StartTimer() + for j := int64(0); j < int64(n); j++ { + r.Set(j, struct{}{}) + } + b.StopTimer() + r.Close() + } + b.StopTimer() +} + +func BenchmarkGetSeq1e3(b *testing.B) { + benchmarkGetSeq(b, 1e3) +} + +func BenchmarkGetSeq1e4(b *testing.B) { + benchmarkGetSeq(b, 1e4) +} + +func BenchmarkGetSeq1e5(b *testing.B) { + benchmarkGetSeq(b, 1e5) +} + +func BenchmarkGetSeq1e6(b *testing.B) { + benchmarkGetSeq(b, 1e6) +} + +func benchmarkGetSeq(b *testing.B, n int) { + r := TreeNew(cmp) + for i := int64(0); i < int64(n); i++ { + r.Set(i, struct{}{}) + } + debug.FreeOSMemory() + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := int64(0); j < int64(n); j++ { + r.Get(j) + } + } + b.StopTimer() + r.Close() +} + +func BenchmarkSetRnd1e3(b *testing.B) { + benchmarkSetRnd(b, 1e3) +} + +func BenchmarkSetRnd1e4(b *testing.B) { + benchmarkSetRnd(b, 1e4) +} + +func BenchmarkSetRnd1e5(b *testing.B) { + benchmarkSetRnd(b, 1e5) +} + +func BenchmarkSetRnd1e6(b *testing.B) { + benchmarkSetRnd(b, 1e6) +} + +func benchmarkSetRnd(b *testing.B, n int) { + rng := rng() + a := make([]int, n) + for i := range a { + a[i] = rng.Next() + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + r := TreeNew(cmp) + debug.FreeOSMemory() + b.StartTimer() + for _, v := range a { + r.Set(int64(v), struct{}{}) + } + b.StopTimer() + r.Close() + } + b.StopTimer() +} + +func BenchmarkGetRnd1e3(b *testing.B) { + benchmarkGetRnd(b, 1e3) +} + +func BenchmarkGetRnd1e4(b *testing.B) { + benchmarkGetRnd(b, 1e4) +} + +func BenchmarkGetRnd1e5(b *testing.B) { + benchmarkGetRnd(b, 1e5) +} + +func BenchmarkGetRnd1e6(b *testing.B) { + benchmarkGetRnd(b, 1e6) +} + +func benchmarkGetRnd(b *testing.B, n int) { + r := TreeNew(cmp) + rng := rng() + a := make([]int64, n) + for i := range a { + a[i] = int64(rng.Next()) + } + for _, v := range a { + r.Set(v, struct{}{}) + } + debug.FreeOSMemory() + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, v := range a { + r.Get(v) + } + } + b.StopTimer() + r.Close() +} + +func BenchmarkDelSeq1e3(b *testing.B) { + benchmarkDelSeq(b, 1e3) +} + +func BenchmarkDelSeq1e4(b *testing.B) { + benchmarkDelSeq(b, 1e4) +} + +func BenchmarkDelSeq1e5(b *testing.B) { + benchmarkDelSeq(b, 1e5) +} + +func BenchmarkDelSeq1e6(b *testing.B) { + benchmarkDelSeq(b, 1e6) +} + +func benchmarkDelSeq(b *testing.B, n int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + r := TreeNew(cmp) + for j := int64(0); j < int64(n); j++ { + r.Set(j, struct{}{}) + } + debug.FreeOSMemory() + b.StartTimer() + for j := int64(0); j < int64(n); j++ { + r.Delete(j) + } + } + b.StopTimer() +} + +func BenchmarkDelRnd1e3(b *testing.B) { + benchmarkDelRnd(b, 1e3) +} + +func BenchmarkDelRnd1e4(b *testing.B) { + benchmarkDelRnd(b, 1e4) +} + +func BenchmarkDelRnd1e5(b *testing.B) { + benchmarkDelRnd(b, 1e5) +} + +func BenchmarkDelRnd1e6(b *testing.B) { + benchmarkDelRnd(b, 1e6) +} + +func benchmarkDelRnd(b *testing.B, n int) { + rng := rng() + a := make([]int64, n) + for i := range a { + a[i] = int64(rng.Next()) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + r := TreeNew(cmp) + for _, v := range a { + r.Set(v, struct{}{}) + } + debug.FreeOSMemory() + b.StartTimer() + for _, v := range a { + r.Delete(v) + } + b.StopTimer() + r.Close() + } + b.StopTimer() +} + +func BenchmarkSeekSeq1e3(b *testing.B) { + benchmarkSeekSeq(b, 1e3) +} + +func BenchmarkSeekSeq1e4(b *testing.B) { + benchmarkSeekSeq(b, 1e4) +} + +func BenchmarkSeekSeq1e5(b *testing.B) { + benchmarkSeekSeq(b, 1e5) +} + +func BenchmarkSeekSeq1e6(b *testing.B) { + benchmarkSeekSeq(b, 1e6) +} + +func benchmarkSeekSeq(b *testing.B, n int) { + for i := 0; i < b.N; i++ { + b.StopTimer() + t := TreeNew(cmp) + for j := int64(0); j < int64(n); j++ { + t.Set(j, struct{}{}) + } + debug.FreeOSMemory() + b.StartTimer() + for j := int64(0); j < int64(n); j++ { + e, _ := t.Seek(j) + e.Close() + } + b.StopTimer() + t.Close() + } + b.StopTimer() +} + +func BenchmarkSeekRnd1e3(b *testing.B) { + benchmarkSeekRnd(b, 1e3) +} + +func BenchmarkSeekRnd1e4(b *testing.B) { + benchmarkSeekRnd(b, 1e4) +} + +func BenchmarkSeekRnd1e5(b *testing.B) { + benchmarkSeekRnd(b, 1e5) +} + +func BenchmarkSeekRnd1e6(b *testing.B) { + benchmarkSeekRnd(b, 1e6) +} + +func benchmarkSeekRnd(b *testing.B, n int) { + r := TreeNew(cmp) + rng := rng() + a := make([]int64, n) + for i := range a { + a[i] = int64(rng.Next()) + } + for _, v := range a { + r.Set(v, struct{}{}) + } + debug.FreeOSMemory() + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, v := range a { + e, _ := r.Seek(v) + e.Close() + } + } + b.StopTimer() + r.Close() +} + +func BenchmarkNext1e3(b *testing.B) { + benchmarkNext(b, 1e3) +} + +func BenchmarkNext1e4(b *testing.B) { + benchmarkNext(b, 1e4) +} + +func BenchmarkNext1e5(b *testing.B) { + benchmarkNext(b, 1e5) +} + +func BenchmarkNext1e6(b *testing.B) { + benchmarkNext(b, 1e6) +} + +func benchmarkNext(b *testing.B, n int) { + t := TreeNew(cmp) + for i := int64(0); i < int64(n); i++ { + t.Set(i, struct{}{}) + } + debug.FreeOSMemory() + b.ResetTimer() + for i := 0; i < b.N; i++ { + en, err := t.SeekFirst() + if err != nil { + b.Fatal(err) + } + + m := 0 + for { + if _, _, err = en.Next(); err != nil { + break + } + m++ + } + if m != n { + b.Fatal(m) + } + } + b.StopTimer() + t.Close() +} + +func BenchmarkPrev1e3(b *testing.B) { + benchmarkPrev(b, 1e3) +} + +func BenchmarkPrev1e4(b *testing.B) { + benchmarkPrev(b, 1e4) +} + +func BenchmarkPrev1e5(b *testing.B) { + benchmarkPrev(b, 1e5) +} + +func BenchmarkPrev1e6(b *testing.B) { + benchmarkPrev(b, 1e6) +} + +func benchmarkPrev(b *testing.B, n int) { + t := TreeNew(cmp) + for i := int64(0); i < int64(n); i++ { + t.Set(i, struct{}{}) + } + debug.FreeOSMemory() + b.ResetTimer() + for i := 0; i < b.N; i++ { + en, err := t.SeekLast() + if err != nil { + b.Fatal(err) + } + + m := 0 + for { + if _, _, err = en.Prev(); err != nil { + break + } + m++ + } + if m != n { + b.Fatal(m) + } + } +} diff --git a/graph/memstore/iterator.go b/graph/memstore/iterator.go index 24dda19..00e6bdc 100644 --- a/graph/memstore/iterator.go +++ b/graph/memstore/iterator.go @@ -19,47 +19,36 @@ import ( "math" "strings" - "github.com/petar/GoLLRB/llrb" - "github.com/google/cayley/graph" "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/graph/memstore/b" ) type Iterator struct { - uid uint64 - tags graph.Tagger - tree *llrb.LLRB - data string - isRunning bool - iterLast Int64 - result graph.Value + uid uint64 + ts *TripleStore + tags graph.Tagger + tree *b.Tree + iter *b.Enumerator + data string + result graph.Value } -type Int64 int64 - -func (i Int64) Less(than llrb.Item) bool { - return i < than.(Int64) +func cmp(a, b int64) int { + return int(a - b) } -func IterateOne(tree *llrb.LLRB, last Int64) Int64 { - var next Int64 - tree.AscendGreaterOrEqual(last, func(i llrb.Item) bool { - if i.(Int64) == last { - return true - } else { - next = i.(Int64) - return false - } - }) - return next -} - -func NewLlrbIterator(tree *llrb.LLRB, data string) *Iterator { +func NewIterator(tree *b.Tree, data string, ts *TripleStore) *Iterator { + iter, err := tree.SeekFirst() + if err != nil { + iter = nil + } return &Iterator{ - uid: iterator.NextUID(), - tree: tree, - iterLast: Int64(-1), - data: data, + uid: iterator.NextUID(), + ts: ts, + tree: tree, + iter: iter, + data: data, } } @@ -68,7 +57,11 @@ func (it *Iterator) UID() uint64 { } func (it *Iterator) Reset() { - it.iterLast = Int64(-1) + var err error + it.iter, err = it.tree.SeekFirst() + if err != nil { + it.iter = nil + } } func (it *Iterator) Tagger() *graph.Tagger { @@ -86,20 +79,53 @@ func (it *Iterator) TagResults(dst map[string]graph.Value) { } func (it *Iterator) Clone() graph.Iterator { - m := NewLlrbIterator(it.tree, it.data) + var iter *b.Enumerator + if it.result != nil { + var ok bool + iter, ok = it.tree.Seek(it.result.(int64)) + if !ok { + panic("value unexpectedly missing") + } + } else { + var err error + iter, err = it.tree.SeekFirst() + if err != nil { + iter = nil + } + } + + m := &Iterator{ + uid: iterator.NextUID(), + ts: it.ts, + tree: it.tree, + iter: iter, + data: it.data, + } m.tags.CopyFrom(it) + return m } func (it *Iterator) Close() {} +func (it *Iterator) checkValid(index int64) bool { + return it.ts.log[index].DeletedBy == 0 +} + func (it *Iterator) Next() bool { graph.NextLogIn(it) - if it.tree.Max() == nil || it.result == int64(it.tree.Max().(Int64)) { + + if it.iter == nil { return graph.NextLogOut(it, nil, false) } - it.iterLast = IterateOne(it.tree, it.iterLast) - it.result = int64(it.iterLast) + result, _, err := it.iter.Next() + if err != nil { + return graph.NextLogOut(it, nil, false) + } + if !it.checkValid(result) { + return it.Next() + } + it.result = result return graph.NextLogOut(it, it.result, true) } @@ -126,7 +152,7 @@ func (it *Iterator) Size() (int64, bool) { func (it *Iterator) Contains(v graph.Value) bool { graph.ContainsLogIn(it, v) - if it.tree.Has(Int64(v.(int64))) { + if _, ok := it.tree.Get(v.(int64)); ok { it.result = v return graph.ContainsLogOut(it, v, true) } @@ -141,7 +167,7 @@ func (it *Iterator) DebugString(indent int) string { var memType graph.Type func init() { - memType = graph.RegisterIterator("llrb") + memType = graph.RegisterIterator("b+tree") } func Type() graph.Type { return memType } diff --git a/graph/memstore/triplestore.go b/graph/memstore/triplestore.go index 3641a60..da03810 100644 --- a/graph/memstore/triplestore.go +++ b/graph/memstore/triplestore.go @@ -18,11 +18,11 @@ import ( "fmt" "github.com/barakmich/glog" + "github.com/google/cayley/graph" "github.com/google/cayley/graph/iterator" + "github.com/google/cayley/graph/memstore/b" "github.com/google/cayley/quad" - - "github.com/petar/GoLLRB/llrb" ) func init() { @@ -31,129 +31,131 @@ func init() { }, nil) } -type TripleDirectionIndex struct { - subject map[int64]*llrb.LLRB - predicate map[int64]*llrb.LLRB - object map[int64]*llrb.LLRB - label map[int64]*llrb.LLRB +type QuadDirectionIndex struct { + index [4]map[int64]*b.Tree } -func NewTripleDirectionIndex() *TripleDirectionIndex { - var tdi TripleDirectionIndex - tdi.subject = make(map[int64]*llrb.LLRB) - tdi.predicate = make(map[int64]*llrb.LLRB) - tdi.object = make(map[int64]*llrb.LLRB) - tdi.label = make(map[int64]*llrb.LLRB) - return &tdi +func NewQuadDirectionIndex() QuadDirectionIndex { + return QuadDirectionIndex{[...]map[int64]*b.Tree{ + quad.Subject - 1: make(map[int64]*b.Tree), + quad.Predicate - 1: make(map[int64]*b.Tree), + quad.Object - 1: make(map[int64]*b.Tree), + quad.Label - 1: make(map[int64]*b.Tree), + }} } -func (tdi *TripleDirectionIndex) GetForDir(d quad.Direction) map[int64]*llrb.LLRB { - switch d { - case quad.Subject: - return tdi.subject - case quad.Object: - return tdi.object - case quad.Predicate: - return tdi.predicate - case quad.Label: - return tdi.label +func (qdi QuadDirectionIndex) Tree(d quad.Direction, id int64) *b.Tree { + if d < quad.Subject || d > quad.Label { + panic("illegal direction") } - panic("illegal direction") -} - -func (tdi *TripleDirectionIndex) GetOrCreate(d quad.Direction, id int64) *llrb.LLRB { - directionIndex := tdi.GetForDir(d) - if _, ok := directionIndex[id]; !ok { - directionIndex[id] = llrb.New() + tree, ok := qdi.index[d-1][id] + if !ok { + tree = b.TreeNew(cmp) + qdi.index[d-1][id] = tree } - return directionIndex[id] + return tree } -func (tdi *TripleDirectionIndex) Get(d quad.Direction, id int64) (*llrb.LLRB, bool) { - directionIndex := tdi.GetForDir(d) - tree, exists := directionIndex[id] - return tree, exists +func (qdi QuadDirectionIndex) Get(d quad.Direction, id int64) (*b.Tree, bool) { + if d < quad.Subject || d > quad.Label { + panic("illegal direction") + } + tree, ok := qdi.index[d-1][id] + return tree, ok +} + +type LogEntry struct { + graph.Delta + DeletedBy int64 } type TripleStore struct { - idCounter int64 - tripleIdCounter int64 - idMap map[string]int64 - revIdMap map[int64]string - triples []quad.Quad - size int64 - index TripleDirectionIndex - // vip_index map[string]map[int64]map[string]map[int64]*llrb.Tree + idCounter int64 + quadIdCounter int64 + idMap map[string]int64 + revIdMap map[int64]string + log []LogEntry + size int64 + index QuadDirectionIndex + // vip_index map[string]map[int64]map[string]map[int64]*b.Tree } func newTripleStore() *TripleStore { - var ts TripleStore - ts.idMap = make(map[string]int64) - ts.revIdMap = make(map[int64]string) - ts.triples = make([]quad.Quad, 1, 200) + return &TripleStore{ + idMap: make(map[string]int64), + revIdMap: make(map[int64]string), - // Sentinel null triple so triple indices start at 1 - ts.triples[0] = quad.Quad{} - ts.size = 1 - ts.index = *NewTripleDirectionIndex() - ts.idCounter = 1 - ts.tripleIdCounter = 1 - return &ts -} + // Sentinel null entry so indices start at 1 + log: make([]LogEntry, 1, 200), -func (ts *TripleStore) AddTripleSet(triples []quad.Quad) { - for _, t := range triples { - ts.AddTriple(t) + index: NewQuadDirectionIndex(), + idCounter: 1, + quadIdCounter: 1, } } -func (ts *TripleStore) tripleExists(t quad.Quad) (bool, int64) { - smallest := -1 - var smallest_tree *llrb.LLRB +func (ts *TripleStore) ApplyDeltas(deltas []graph.Delta) error { + for _, d := range deltas { + var err error + if d.Action == graph.Add { + err = ts.AddDelta(d) + } else { + err = ts.RemoveDelta(d) + } + if err != nil { + return err + } + } + return nil +} + +const maxInt = int(^uint(0) >> 1) + +func (ts *TripleStore) indexOf(t quad.Quad) (int64, bool) { + min := maxInt + var tree *b.Tree for d := quad.Subject; d <= quad.Label; d++ { sid := t.Get(d) if d == quad.Label && sid == "" { continue } id, ok := ts.idMap[sid] - // If we've never heard about a node, it most not exist + // If we've never heard about a node, it must not exist if !ok { - return false, 0 + return 0, false } - index, exists := ts.index.Get(d, id) - if !exists { + index, ok := ts.index.Get(d, id) + if !ok { // If it's never been indexed in this direction, it can't exist. - return false, 0 + return 0, false } - if smallest == -1 || index.Len() < smallest { - smallest = index.Len() - smallest_tree = index + if l := index.Len(); l < min { + min, tree = l, index } } - it := NewLlrbIterator(smallest_tree, "") + it := NewIterator(tree, "", ts) for it.Next() { val := it.Result() - if t == ts.triples[val.(int64)] { - return true, val.(int64) + if t == ts.log[val.(int64)].Quad { + return val.(int64), true } } - return false, 0 + return 0, false } -func (ts *TripleStore) AddTriple(t quad.Quad) { - if exists, _ := ts.tripleExists(t); exists { - return +func (ts *TripleStore) AddDelta(d graph.Delta) error { + if _, exists := ts.indexOf(d.Quad); exists { + return graph.ErrQuadExists } - var tripleID int64 - ts.triples = append(ts.triples, t) - tripleID = ts.tripleIdCounter + qid := ts.quadIdCounter + ts.log = append(ts.log, LogEntry{Delta: d}) ts.size++ - ts.tripleIdCounter++ + ts.quadIdCounter++ - for d := quad.Subject; d <= quad.Label; d++ { - sid := t.Get(d) - if d == quad.Label && sid == "" { + for dir := quad.Subject; dir <= quad.Label; dir++ { + sid := d.Quad.Get(dir) + if dir == quad.Label && sid == "" { continue } if _, ok := ts.idMap[sid]; !ok { @@ -163,87 +165,60 @@ func (ts *TripleStore) AddTriple(t quad.Quad) { } } - for d := quad.Subject; d <= quad.Label; d++ { - if d == quad.Label && t.Get(d) == "" { + for dir := quad.Subject; dir <= quad.Label; dir++ { + if dir == quad.Label && d.Quad.Get(dir) == "" { continue } - id := ts.idMap[t.Get(d)] - tree := ts.index.GetOrCreate(d, id) - tree.ReplaceOrInsert(Int64(tripleID)) + id := ts.idMap[d.Quad.Get(dir)] + tree := ts.index.Tree(dir, id) + tree.Set(qid, struct{}{}) } // TODO(barakmich): Add VIP indexing + return nil } -func (ts *TripleStore) RemoveTriple(t quad.Quad) { - var tripleID int64 - var exists bool - tripleID = 0 - if exists, tripleID = ts.tripleExists(t); !exists { - return +func (ts *TripleStore) RemoveDelta(d graph.Delta) error { + prevQuadID, exists := ts.indexOf(d.Quad) + if !exists { + return graph.ErrQuadNotExist } - ts.triples[tripleID] = quad.Quad{} + quadID := ts.quadIdCounter + ts.log = append(ts.log, LogEntry{Delta: d}) + ts.log[prevQuadID].DeletedBy = quadID ts.size-- - - for d := quad.Subject; d <= quad.Label; d++ { - if d == quad.Label && t.Get(d) == "" { - continue - } - id := ts.idMap[t.Get(d)] - tree := ts.index.GetOrCreate(d, id) - tree.Delete(Int64(tripleID)) - } - - for d := quad.Subject; d <= quad.Label; d++ { - if d == quad.Label && t.Get(d) == "" { - continue - } - id, ok := ts.idMap[t.Get(d)] - if !ok { - continue - } - stillExists := false - for d := quad.Subject; d <= quad.Label; d++ { - if d == quad.Label && t.Get(d) == "" { - continue - } - nodeTree := ts.index.GetOrCreate(d, id) - if nodeTree.Len() != 0 { - stillExists = true - break - } - } - if !stillExists { - delete(ts.idMap, t.Get(d)) - delete(ts.revIdMap, id) - } - } + ts.quadIdCounter++ + return nil } func (ts *TripleStore) Quad(index graph.Value) quad.Quad { - return ts.triples[index.(int64)] + return ts.log[index.(int64)].Quad } func (ts *TripleStore) TripleIterator(d quad.Direction, value graph.Value) graph.Iterator { index, ok := ts.index.Get(d, value.(int64)) data := fmt.Sprintf("dir:%s val:%d", d, value.(int64)) if ok { - return NewLlrbIterator(index, data) + return NewIterator(index, data, ts) } return &iterator.Null{} } +func (ts *TripleStore) Horizon() int64 { + return ts.log[len(ts.log)-1].ID +} + func (ts *TripleStore) Size() int64 { - return ts.size - 1 // Don't count the sentinel + return ts.size } func (ts *TripleStore) DebugPrint() { - for i, t := range ts.triples { + for i, l := range ts.log { if i == 0 { continue } - glog.V(2).Infof("%d: %s", i, t) + glog.V(2).Infof("%d: %#v", i, l) } } @@ -256,7 +231,7 @@ func (ts *TripleStore) NameOf(id graph.Value) string { } func (ts *TripleStore) TriplesAllIterator() graph.Iterator { - return iterator.NewInt64(0, ts.Size()) + return NewMemstoreQuadsAllIterator(ts) } func (ts *TripleStore) FixedIterator() graph.FixedIterator { @@ -269,6 +244,7 @@ func (ts *TripleStore) TripleDirection(val graph.Value, d quad.Direction) graph. } func (ts *TripleStore) NodesAllIterator() graph.Iterator { - return NewMemstoreAllIterator(ts) + return NewMemstoreNodesAllIterator(ts) } + func (ts *TripleStore) Close() {} diff --git a/graph/memstore/triplestore_test.go b/graph/memstore/triplestore_test.go index 4e81b2d..6f1959f 100644 --- a/graph/memstore/triplestore_test.go +++ b/graph/memstore/triplestore_test.go @@ -22,6 +22,7 @@ import ( "github.com/google/cayley/graph" "github.com/google/cayley/graph/iterator" "github.com/google/cayley/quad" + "github.com/google/cayley/writer" ) // This is a simple test graph. @@ -51,13 +52,14 @@ var simpleGraph = []quad.Quad{ {"G", "status", "cool", "status_graph"}, } -func makeTestStore(data []quad.Quad) (*TripleStore, []pair) { +func makeTestStore(data []quad.Quad) (*TripleStore, graph.QuadWriter, []pair) { seen := make(map[string]struct{}) ts := newTripleStore() var ( val int64 ind []pair ) + writer, _ := writer.NewSingleReplication(ts, nil) for _, t := range data { for _, qp := range []string{t.Subject, t.Predicate, t.Object, t.Label} { if _, ok := seen[qp]; !ok && qp != "" { @@ -66,9 +68,10 @@ func makeTestStore(data []quad.Quad) (*TripleStore, []pair) { seen[qp] = struct{}{} } } - ts.AddTriple(t) + + writer.AddQuad(t) } - return ts, ind + return ts, writer, ind } type pair struct { @@ -77,7 +80,7 @@ type pair struct { } func TestMemstore(t *testing.T) { - ts, index := makeTestStore(simpleGraph) + ts, _, index := makeTestStore(simpleGraph) if size := ts.Size(); size != int64(len(simpleGraph)) { t.Errorf("Triple store has unexpected size, got:%d expected %d", size, len(simpleGraph)) } @@ -95,7 +98,7 @@ func TestMemstore(t *testing.T) { } func TestIteratorsAndNextResultOrderA(t *testing.T) { - ts, _ := makeTestStore(simpleGraph) + ts, _, _ := makeTestStore(simpleGraph) fixed := ts.FixedIterator() fixed.Add(ts.ValueOf("C")) @@ -144,7 +147,7 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) { } func TestLinksToOptimization(t *testing.T) { - ts, _ := makeTestStore(simpleGraph) + ts, _, _ := makeTestStore(simpleGraph) fixed := ts.FixedIterator() fixed.Add(ts.ValueOf("cool")) @@ -172,9 +175,9 @@ func TestLinksToOptimization(t *testing.T) { } func TestRemoveTriple(t *testing.T) { - ts, _ := makeTestStore(simpleGraph) + ts, w, _ := makeTestStore(simpleGraph) - ts.RemoveTriple(quad.Quad{"E", "follows", "F", ""}) + w.RemoveQuad(quad.Quad{"E", "follows", "F", ""}) fixed := ts.FixedIterator() fixed.Add(ts.ValueOf("E")) diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index 9d61711..3356974 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -45,17 +45,7 @@ type Iterator struct { func NewIterator(qs *TripleStore, collection string, d quad.Direction, val graph.Value) *Iterator { name := qs.NameOf(val) - var constraint bson.M - switch d { - case quad.Subject: - constraint = bson.M{"Subject": name} - case quad.Predicate: - constraint = bson.M{"Predicate": name} - case quad.Object: - constraint = bson.M{"Object": name} - case quad.Label: - constraint = bson.M{"Label": name} - } + constraint := bson.M{d.String(): name} size, err := qs.db.C(collection).Find(constraint).Count() if err != nil { @@ -140,10 +130,9 @@ func (it *Iterator) Clone() graph.Iterator { func (it *Iterator) Next() bool { var result struct { - Id string "_id" - //Sub string "Sub" - //Pred string "Pred" - //Obj string "Obj" + Id string "_id" + Added []int64 "Added" + Deleted []int64 "Deleted" } found := it.iter.Next(&result) if !found { @@ -153,6 +142,9 @@ func (it *Iterator) Next() bool { } return false } + if it.collection == "quads" && len(result.Added) <= len(result.Deleted) { + return it.Next() + } it.result = result.Id return true } diff --git a/graph/mongo/triplestore.go b/graph/mongo/triplestore.go index 044e174..03c4c93 100644 --- a/graph/mongo/triplestore.go +++ b/graph/mongo/triplestore.go @@ -18,7 +18,6 @@ import ( "crypto/sha1" "encoding/hex" "hash" - "io" "sync" "gopkg.in/mgo.v2" @@ -34,9 +33,6 @@ func init() { graph.RegisterTripleStore("mongo", true, newTripleStore, createNewMongoGraph) } -// Guarantee we satisfy graph.Bulkloader. -var _ graph.BulkLoader = (*TripleStore)(nil) - const DefaultDBName = "cayley" var ( @@ -64,19 +60,27 @@ func createNewMongoGraph(addr string, options graph.Options) error { } db := conn.DB(dbName) indexOpts := mgo.Index{ - Key: []string{"Sub"}, + Key: []string{"subject"}, Unique: false, DropDups: false, Background: true, Sparse: true, } - db.C("triples").EnsureIndex(indexOpts) - indexOpts.Key = []string{"Pred"} - db.C("triples").EnsureIndex(indexOpts) - indexOpts.Key = []string{"Obj"} - db.C("triples").EnsureIndex(indexOpts) - indexOpts.Key = []string{"Label"} - db.C("triples").EnsureIndex(indexOpts) + db.C("quads").EnsureIndex(indexOpts) + indexOpts.Key = []string{"predicate"} + db.C("quads").EnsureIndex(indexOpts) + indexOpts.Key = []string{"object"} + db.C("quads").EnsureIndex(indexOpts) + indexOpts.Key = []string{"label"} + db.C("quads").EnsureIndex(indexOpts) + logOpts := mgo.Index{ + Key: []string{"LogID"}, + Unique: true, + DropDups: false, + Background: true, + Sparse: true, + } + db.C("log").EnsureIndex(logOpts) return nil } @@ -97,7 +101,7 @@ func newTripleStore(addr string, options graph.Options) (graph.TripleStore, erro return &qs, nil } -func (qs *TripleStore) getIdForTriple(t quad.Quad) string { +func (qs *TripleStore) getIdForQuad(t quad.Quad) string { id := qs.convertStringToByteHash(t.Subject) id += qs.convertStringToByteHash(t.Predicate) id += qs.convertStringToByteHash(t.Object) @@ -122,125 +126,157 @@ type MongoNode struct { Size int "Size" } -func (qs *TripleStore) updateNodeBy(node_name string, inc int) { - var size MongoNode - node := qs.ValueOf(node_name) - err := qs.db.C("nodes").FindId(node).One(&size) - if err != nil { - if err.Error() == "not found" { - // Not found. Okay. - size.Id = node.(string) - size.Name = node_name - size.Size = inc - } else { - glog.Errorf("Error: %v", err) - return - } - } else { - size.Id = node.(string) - size.Name = node_name - size.Size += inc - } - - // Removing something... - if inc < 0 { - if size.Size <= 0 { - err := qs.db.C("nodes").RemoveId(node) - if err != nil { - glog.Errorf("Error: %v while removing node %s", err, node_name) - return - } - } - } - - _, err2 := qs.db.C("nodes").UpsertId(node, size) - if err2 != nil { - glog.Errorf("Error: %v", err) - } +type MongoLogEntry struct { + LogID int64 "LogID" + Action string "Action" + Key string "Key" + Timestamp int64 } -func (qs *TripleStore) writeTriple(t quad.Quad) bool { - tripledoc := bson.M{ - "_id": qs.getIdForTriple(t), - "Subject": t.Subject, - "Predicate": t.Predicate, - "Object": t.Object, - "Label": t.Label, +func (qs *TripleStore) updateNodeBy(node_name string, inc int) error { + node := qs.ValueOf(node_name) + doc := bson.M{ + "_id": node.(string), + "Name": node_name, } - err := qs.db.C("triples").Insert(tripledoc) + upsert := bson.M{ + "$setOnInsert": doc, + "$inc": bson.M{ + "Size": inc, + }, + } + + _, err := qs.db.C("nodes").UpsertId(node, upsert) + if err != nil { + glog.Errorf("Error updating node: %v", err) + } + return err +} + +func (qs *TripleStore) updateQuad(q quad.Quad, id int64, proc graph.Procedure) error { + var setname string + if proc == graph.Add { + setname = "Added" + } else if proc == graph.Delete { + setname = "Deleted" + } + upsert := bson.M{ + "$setOnInsert": q, + "$push": bson.M{ + setname: id, + }, + } + _, err := qs.db.C("quads").UpsertId(qs.getIdForQuad(q), upsert) if err != nil { - // Among the reasons I hate MongoDB. "Errors don't happen! Right guys?" - if err.(*mgo.LastError).Code == 11000 { - return false - } glog.Errorf("Error: %v", err) + } + return err +} + +func (qs *TripleStore) checkValid(key string) bool { + var indexEntry struct { + Added []int64 "Added" + Deleted []int64 "Deleted" + } + err := qs.db.C("quads").FindId(key).One(&indexEntry) + if err == mgo.ErrNotFound { + return false + } + if err != nil { + glog.Errorln("Other error checking valid quad: %s %v.", key, err) + return false + } + if len(indexEntry.Added) <= len(indexEntry.Deleted) { return false } return true } -func (qs *TripleStore) AddTriple(t quad.Quad) { - _ = qs.writeTriple(t) - qs.updateNodeBy(t.Subject, 1) - qs.updateNodeBy(t.Predicate, 1) - qs.updateNodeBy(t.Object, 1) - if t.Label != "" { - qs.updateNodeBy(t.Label, 1) +func (qs *TripleStore) updateLog(d graph.Delta) error { + var action string + if d.Action == graph.Add { + action = "Add" + } else { + action = "Delete" } + entry := MongoLogEntry{ + LogID: d.ID, + Action: action, + Key: qs.getIdForQuad(d.Quad), + Timestamp: d.Timestamp.UnixNano(), + } + err := qs.db.C("log").Insert(entry) + if err != nil { + glog.Errorf("Error updating log: %v", err) + } + return err } -func (qs *TripleStore) AddTripleSet(in []quad.Quad) { +func (qs *TripleStore) ApplyDeltas(in []graph.Delta) error { qs.session.SetSafe(nil) ids := make(map[string]int) - for _, t := range in { - wrote := qs.writeTriple(t) - if wrote { - ids[t.Subject]++ - ids[t.Object]++ - ids[t.Predicate]++ - if t.Label != "" { - ids[t.Label]++ + // Pre-check the existence condition. + for _, d := range in { + key := qs.getIdForQuad(d.Quad) + switch d.Action { + case graph.Add: + if qs.checkValid(key) { + return graph.ErrQuadExists + } + case graph.Delete: + if !qs.checkValid(key) { + return graph.ErrQuadNotExist } } } + if glog.V(2) { + glog.Infoln("Existence verified. Proceeding.") + } + for _, d := range in { + err := qs.updateLog(d) + if err != nil { + return err + } + } + for _, d := range in { + err := qs.updateQuad(d.Quad, d.ID, d.Action) + if err != nil { + return err + } + var countdelta int + if d.Action == graph.Add { + countdelta = 1 + } else { + countdelta = -1 + } + ids[d.Quad.Subject] += countdelta + ids[d.Quad.Object] += countdelta + ids[d.Quad.Predicate] += countdelta + if d.Quad.Label != "" { + ids[d.Quad.Label] += countdelta + } + } for k, v := range ids { - qs.updateNodeBy(k, v) + err := qs.updateNodeBy(k, v) + if err != nil { + return err + } } qs.session.SetSafe(&mgo.Safe{}) -} - -func (qs *TripleStore) RemoveTriple(t quad.Quad) { - err := qs.db.C("triples").RemoveId(qs.getIdForTriple(t)) - if err == mgo.ErrNotFound { - return - } else if err != nil { - glog.Errorf("Error: %v while removing triple %v", err, t) - return - } - qs.updateNodeBy(t.Subject, -1) - qs.updateNodeBy(t.Predicate, -1) - qs.updateNodeBy(t.Object, -1) - if t.Label != "" { - qs.updateNodeBy(t.Label, -1) - } + return nil } func (qs *TripleStore) Quad(val graph.Value) quad.Quad { - var bsonDoc bson.M - err := qs.db.C("triples").FindId(val.(string)).One(&bsonDoc) + var q quad.Quad + err := qs.db.C("quads").FindId(val.(string)).One(&q) if err != nil { - glog.Errorf("Error: Couldn't retrieve triple %s %v", val, err) - } - return quad.Quad{ - bsonDoc["Subject"].(string), - bsonDoc["Predicate"].(string), - bsonDoc["Object"].(string), - bsonDoc["Label"].(string), + glog.Errorf("Error: Couldn't retrieve quad %s %v", val, err) } + return q } func (qs *TripleStore) TripleIterator(d quad.Direction, val graph.Value) graph.Iterator { - return NewIterator(qs, "triples", d, val) + return NewIterator(qs, "quads", d, val) } func (qs *TripleStore) NodesAllIterator() graph.Iterator { @@ -248,7 +284,7 @@ func (qs *TripleStore) NodesAllIterator() graph.Iterator { } func (qs *TripleStore) TriplesAllIterator() graph.Iterator { - return NewAllIterator(qs, "triples") + return NewAllIterator(qs, "quads") } func (qs *TripleStore) ValueOf(s string) graph.Value { @@ -270,7 +306,8 @@ func (qs *TripleStore) NameOf(v graph.Value) string { } func (qs *TripleStore) Size() int64 { - count, err := qs.db.C("triples").Count() + // TODO(barakmich): Make size real; store it in the log, and retrieve it. + count, err := qs.db.C("quads").Count() if err != nil { glog.Errorf("Error: %v", err) return 0 @@ -278,6 +315,18 @@ func (qs *TripleStore) Size() int64 { return int64(count) } +func (qs *TripleStore) Horizon() int64 { + var log MongoLogEntry + err := qs.db.C("log").Find(nil).Sort("-LogID").One(&log) + if err != nil { + if err == mgo.ErrNotFound { + return 0 + } + glog.Errorf("Could not get Horizon from Mongo: %v", err) + } + return log.LogID +} + func compareStrings(a, b graph.Value) bool { return a.(string) == b.(string) } @@ -307,61 +356,4 @@ func (qs *TripleStore) TripleDirection(in graph.Value, d quad.Direction) graph.V return val } -func (qs *TripleStore) BulkLoad(dec quad.Unmarshaler) error { - if qs.Size() != 0 { - return graph.ErrCannotBulkLoad - } - - qs.session.SetSafe(nil) - for { - q, err := dec.Unmarshal() - if err != nil { - if err != io.EOF { - return err - } - break - } - qs.writeTriple(q) - } - - outputTo := bson.M{"replace": "nodes", "sharded": true} - glog.Infoln("Mapreducing") - job := mgo.MapReduce{ - Map: `function() { - var len = this["_id"].length - var s_key = this["_id"].slice(0, len / 4) - var p_key = this["_id"].slice(len / 4, 2 * len / 4) - var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4) - var c_key = this["_id"].slice(3 * len / 4) - emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1}) - emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1}) - emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1}) - if (this.Label != "") { - emit(c_key, {"_id": c_key, "Name" : this.Label, "Size" : 1}) - } - } - `, - Reduce: ` - function(key, value_list) { - out = {"_id": key, "Name": value_list[0].Name} - count = 0 - for (var i = 0; i < value_list.length; i++) { - count = count + value_list[i].Size - - } - out["Size"] = count - return out - } - `, - Out: outputTo, - } - qs.db.C("triples").Find(nil).MapReduce(&job, nil) - glog.Infoln("Fixing") - qs.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) { - db.nodes.update({"_id": result._id}, result.value) - }) }`}, {"args", bson.D{}}}, nil) - - qs.session.SetSafe(&mgo.Safe{}) - - return nil -} +// TODO(barakmich): Rewrite bulk loader. For now, iterating around blocks is the way we'll go about it. diff --git a/graph/quadwriter.go b/graph/quadwriter.go new file mode 100644 index 0000000..38ae137 --- /dev/null +++ b/graph/quadwriter.go @@ -0,0 +1,101 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines the interface for consistent replication of a graph instance. +// +// Separate from the backend, this dictates how individual triples get +// identified and replicated consistently across (potentially) multiple +// instances. The simplest case is to keep an append-only log of triple +// changes. + +import ( + "errors" + "time" + + "github.com/google/cayley/quad" +) + +type Procedure byte + +// The different types of actions a transaction can do. +const ( + Add Procedure = iota + Delete +) + +type Delta struct { + ID int64 + Quad quad.Quad + Action Procedure + Timestamp time.Time +} + +type Handle struct { + QuadStore TripleStore + QuadWriter QuadWriter +} + +func (h *Handle) Close() { + h.QuadStore.Close() + h.QuadWriter.Close() +} + +var ( + ErrQuadExists = errors.New("Quad exists") + ErrQuadNotExist = errors.New("Quad doesn't exist") +) + +type QuadWriter interface { + // Add a quad to the store. + AddQuad(quad.Quad) error + + // Add a set of quads to the store, atomically if possible. + AddQuadSet([]quad.Quad) error + + // Removes a quad matching the given one from the database, + // if it exists. Does nothing otherwise. + RemoveQuad(quad.Quad) error + + // Cleans up replication and closes the writing aspect of the database. + Close() error +} + +type NewQuadWriterFunc func(TripleStore, Options) (QuadWriter, error) + +var writerRegistry = make(map[string]NewQuadWriterFunc) + +func RegisterWriter(name string, newFunc NewQuadWriterFunc) { + if _, found := writerRegistry[name]; found { + panic("already registered TripleWriter " + name) + } + writerRegistry[name] = newFunc +} + +func NewQuadWriter(name string, ts TripleStore, opts Options) (QuadWriter, error) { + newFunc, hasNew := writerRegistry[name] + if !hasNew { + return nil, errors.New("replication: name '" + name + "' is not registered") + } + return newFunc(ts, opts) +} + +func WriterMethods() []string { + t := make([]string, 0, len(writerRegistry)) + for n := range writerRegistry { + t = append(t, n) + } + return t +} diff --git a/graph/triplestore.go b/graph/triplestore.go index 9d45dd5..df85124 100644 --- a/graph/triplestore.go +++ b/graph/triplestore.go @@ -42,15 +42,9 @@ import ( type Value interface{} type TripleStore interface { - // Add a triple to the store. - AddTriple(quad.Quad) - - // Add a set of triples to the store, atomically if possible. - AddTripleSet([]quad.Quad) - - // Removes a triple matching the given one from the database, - // if it exists. Does nothing otherwise. - RemoveTriple(quad.Quad) + // The only way in is through building a transaction, which + // is done by a replication strategy. + ApplyDeltas([]Delta) error // Given an opaque token, returns the triple for that token from the store. Quad(Value) quad.Quad @@ -75,6 +69,9 @@ type TripleStore interface { // Returns the number of triples currently stored. Size() int64 + // The last replicated transaction ID that this triplestore has verified. + Horizon() int64 + // Creates a fixed iterator which can compare Values FixedIterator() FixedIterator diff --git a/http/http.go b/http/http.go index 70321dd..d5f9351 100644 --- a/http/http.go +++ b/http/http.go @@ -107,7 +107,7 @@ func (h *TemplateRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques type Api struct { config *config.Config - ts graph.TripleStore + handle *graph.Handle } func (api *Api) ApiV1(r *httprouter.Router) { @@ -119,7 +119,7 @@ func (api *Api) ApiV1(r *httprouter.Router) { r.POST("/api/v1/delete", LogRequest(api.ServeV1Delete)) } -func SetupRoutes(ts graph.TripleStore, cfg *config.Config) { +func SetupRoutes(handle *graph.Handle, cfg *config.Config) { r := httprouter.New() assets := findAssetsPath() if glog.V(2) { @@ -129,7 +129,7 @@ func SetupRoutes(ts graph.TripleStore, cfg *config.Config) { templates.ParseGlob(fmt.Sprint(assets, "/templates/*.html")) root := &TemplateRequestHandler{templates: templates} docs := &DocRequestHandler{assets: assets} - api := &Api{config: cfg, ts: ts} + api := &Api{config: cfg, handle: handle} api.ApiV1(r) //m.Use(martini.Static("static", martini.StaticOptions{Prefix: "/static", SkipLogging: true})) @@ -141,8 +141,8 @@ func SetupRoutes(ts graph.TripleStore, cfg *config.Config) { http.Handle("/", r) } -func Serve(ts graph.TripleStore, cfg *config.Config) { - SetupRoutes(ts, cfg) +func Serve(handle *graph.Handle, cfg *config.Config) { + SetupRoutes(handle, cfg) glog.Infof("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort) fmt.Printf("Cayley now listening on %s:%s\n", cfg.ListenHost, cfg.ListenPort) err := http.ListenAndServe(fmt.Sprintf("%s:%s", cfg.ListenHost, cfg.ListenPort), nil) diff --git a/http/query.go b/http/query.go index f4f5a34..9d6460a 100644 --- a/http/query.go +++ b/http/query.go @@ -71,9 +71,9 @@ func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params http var ses query.HttpSession switch params.ByName("query_lang") { case "gremlin": - ses = gremlin.NewSession(api.ts, api.config.Timeout, false) + ses = gremlin.NewSession(api.handle.QuadStore, api.config.Timeout, false) case "mql": - ses = mql.NewSession(api.ts) + ses = mql.NewSession(api.handle.QuadStore) default: return FormatJson400(w, "Need a query language.") } @@ -110,18 +110,15 @@ func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params http ses = nil return FormatJsonError(w, 500, "Incomplete data?") } - http.Error(w, "", http.StatusNotFound) - ses = nil - return http.StatusNotFound } func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { var ses query.HttpSession switch params.ByName("query_lang") { case "gremlin": - ses = gremlin.NewSession(api.ts, api.config.Timeout, false) + ses = gremlin.NewSession(api.handle.QuadStore, api.config.Timeout, false) case "mql": - ses = mql.NewSession(api.ts) + ses = mql.NewSession(api.handle.QuadStore) default: return FormatJson400(w, "Need a query language.") } @@ -146,6 +143,4 @@ func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params http default: return FormatJsonError(w, 500, "Incomplete data?") } - http.Error(w, "", http.StatusNotFound) - return http.StatusNotFound } diff --git a/http/write.go b/http/write.go index 54e5537..018b339 100644 --- a/http/write.go +++ b/http/write.go @@ -55,7 +55,7 @@ func (api *Api) ServeV1Write(w http.ResponseWriter, r *http.Request, _ httproute if terr != nil { return FormatJson400(w, terr) } - api.ts.AddTripleSet(tripleList) + api.handle.QuadWriter.AddQuadSet(tripleList) fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", len(tripleList)) return 200 } @@ -97,11 +97,11 @@ func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params block = append(block, t) n++ if len(block) == cap(block) { - api.ts.AddTripleSet(block) + api.handle.QuadWriter.AddQuadSet(block) block = block[:0] } } - api.ts.AddTripleSet(block) + api.handle.QuadWriter.AddQuadSet(block) fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", n) @@ -122,7 +122,7 @@ func (api *Api) ServeV1Delete(w http.ResponseWriter, r *http.Request, params htt } count := 0 for _, triple := range tripleList { - api.ts.RemoveTriple(triple) + api.handle.QuadWriter.RemoveQuad(triple) count++ } fmt.Fprintf(w, "{\"result\": \"Successfully deleted %d triples.\"}", count) diff --git a/query/gremlin/gremlin_test.go b/query/gremlin/gremlin_test.go index a638eb7..566305a 100644 --- a/query/gremlin/gremlin_test.go +++ b/query/gremlin/gremlin_test.go @@ -23,6 +23,7 @@ import ( "github.com/google/cayley/quad" _ "github.com/google/cayley/graph/memstore" + _ "github.com/google/cayley/writer" ) // This is a simple test graph. @@ -54,8 +55,9 @@ var simpleGraph = []quad.Quad{ func makeTestSession(data []quad.Quad) *Session { ts, _ := graph.NewTripleStore("memstore", "", nil) + w, _ := graph.NewQuadWriter("single", ts, nil) for _, t := range data { - ts.AddTriple(t) + w.AddQuad(t) } return NewSession(ts, -1, false) } diff --git a/query/mql/mql_test.go b/query/mql/mql_test.go index 619ae54..d7e8ef4 100644 --- a/query/mql/mql_test.go +++ b/query/mql/mql_test.go @@ -22,6 +22,7 @@ import ( "github.com/google/cayley/graph" _ "github.com/google/cayley/graph/memstore" "github.com/google/cayley/quad" + _ "github.com/google/cayley/writer" ) // This is a simple test graph. @@ -53,8 +54,9 @@ var simpleGraph = []quad.Quad{ func makeTestSession(data []quad.Quad) *Session { ts, _ := graph.NewTripleStore("memstore", "", nil) + w, _ := graph.NewQuadWriter("single", ts, nil) for _, t := range data { - ts.AddTriple(t) + w.AddQuad(t) } return NewSession(ts) } diff --git a/query/sexp/parser_test.go b/query/sexp/parser_test.go index d4d16af..2026a2f 100644 --- a/query/sexp/parser_test.go +++ b/query/sexp/parser_test.go @@ -21,6 +21,7 @@ import ( "github.com/google/cayley/quad" _ "github.com/google/cayley/graph/memstore" + _ "github.com/google/cayley/writer" ) func TestBadParse(t *testing.T) { @@ -55,13 +56,14 @@ var testQueries = []struct { func TestMemstoreBackedSexp(t *testing.T) { ts, _ := graph.NewTripleStore("memstore", "", nil) + w, _ := graph.NewQuadWriter("single", ts, nil) it := BuildIteratorTreeForQuery(ts, "()") if it.Type() != graph.Null { t.Errorf(`Incorrect type for empty query, got:%q expect: "null"`, it.Type()) } for _, test := range testQueries { if test.add.IsValid() { - ts.AddTriple(test.add) + w.AddQuad(test.add) } it := BuildIteratorTreeForQuery(ts, test.query) if it.Type() != test.typ { @@ -79,8 +81,9 @@ func TestMemstoreBackedSexp(t *testing.T) { func TestTreeConstraintParse(t *testing.T) { ts, _ := graph.NewTripleStore("memstore", "", nil) - ts.AddTriple(quad.Quad{"i", "like", "food", ""}) - ts.AddTriple(quad.Quad{"food", "is", "good", ""}) + w, _ := graph.NewQuadWriter("single", ts, nil) + w.AddQuad(quad.Quad{"i", "like", "food", ""}) + w.AddQuad(quad.Quad{"food", "is", "good", ""}) query := "(\"i\"\n" + "(:like\n" + "($a (:is :good))))" @@ -99,8 +102,9 @@ func TestTreeConstraintParse(t *testing.T) { func TestTreeConstraintTagParse(t *testing.T) { ts, _ := graph.NewTripleStore("memstore", "", nil) - ts.AddTriple(quad.Quad{"i", "like", "food", ""}) - ts.AddTriple(quad.Quad{"food", "is", "good", ""}) + w, _ := graph.NewQuadWriter("single", ts, nil) + w.AddQuad(quad.Quad{"i", "like", "food", ""}) + w.AddQuad(quad.Quad{"food", "is", "good", ""}) query := "(\"i\"\n" + "(:like\n" + "($a (:is :good))))" @@ -118,12 +122,13 @@ func TestTreeConstraintTagParse(t *testing.T) { func TestMultipleConstraintParse(t *testing.T) { ts, _ := graph.NewTripleStore("memstore", "", nil) + w, _ := graph.NewQuadWriter("single", ts, nil) for _, tv := range []quad.Quad{ {"i", "like", "food", ""}, {"i", "like", "beer", ""}, {"you", "like", "beer", ""}, } { - ts.AddTriple(tv) + w.AddQuad(tv) } query := `( $a diff --git a/writer/single.go b/writer/single.go new file mode 100644 index 0000000..d8c3382 --- /dev/null +++ b/writer/single.go @@ -0,0 +1,91 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package writer + +import ( + "sync" + "time" + + "github.com/google/cayley/graph" + "github.com/google/cayley/quad" +) + +func init() { + graph.RegisterWriter("single", NewSingleReplication) +} + +type Single struct { + nextID int64 + ts graph.TripleStore + mut sync.Mutex +} + +func NewSingleReplication(ts graph.TripleStore, opts graph.Options) (graph.QuadWriter, error) { + horizon := ts.Horizon() + rep := &Single{nextID: horizon + 1, ts: ts} + if horizon <= 0 { + rep.nextID = 1 + } + return rep, nil +} + +func (s *Single) AcquireNextID() int64 { + s.mut.Lock() + defer s.mut.Unlock() + id := s.nextID + s.nextID++ + return id +} + +func (s *Single) AddQuad(q quad.Quad) error { + deltas := make([]graph.Delta, 1) + deltas[0] = graph.Delta{ + ID: s.AcquireNextID(), + Quad: q, + Action: graph.Add, + Timestamp: time.Now(), + } + return s.ts.ApplyDeltas(deltas) +} + +func (s *Single) AddQuadSet(set []quad.Quad) error { + deltas := make([]graph.Delta, len(set)) + for i, q := range set { + deltas[i] = graph.Delta{ + ID: s.AcquireNextID(), + Quad: q, + Action: graph.Add, + Timestamp: time.Now(), + } + } + s.ts.ApplyDeltas(deltas) + return nil +} + +func (s *Single) RemoveQuad(q quad.Quad) error { + deltas := make([]graph.Delta, 1) + deltas[0] = graph.Delta{ + ID: s.AcquireNextID(), + Quad: q, + Action: graph.Delete, + Timestamp: time.Now(), + } + return s.ts.ApplyDeltas(deltas) +} + +func (s *Single) Close() error { + // Nothing to clean up locally. + return nil +}