Merge pull request #105 from kortschak/persistence

Handle persistence and quad format in cayley
This commit is contained in:
Barak Michener 2014-08-08 17:00:28 -04:00
commit 166ebb1a3a
11 changed files with 326 additions and 294 deletions

140
cayley.go
View file

@ -17,9 +17,17 @@
package main package main
import ( import (
"bufio"
"bytes"
"compress/bzip2"
"compress/gzip"
"flag" "flag"
"fmt" "fmt"
"io"
client "net/http"
"net/url"
"os" "os"
"path/filepath"
"runtime" "runtime"
"github.com/barakmich/glog" "github.com/barakmich/glog"
@ -28,6 +36,9 @@ import (
"github.com/google/cayley/db" "github.com/google/cayley/db"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/http" "github.com/google/cayley/http"
"github.com/google/cayley/quad"
"github.com/google/cayley/quad/cquads"
"github.com/google/cayley/quad/nquads"
// Load all supported backends. // Load all supported backends.
_ "github.com/google/cayley/graph/leveldb" _ "github.com/google/cayley/graph/leveldb"
@ -35,14 +46,19 @@ import (
_ "github.com/google/cayley/graph/mongo" _ "github.com/google/cayley/graph/mongo"
) )
var tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.") var (
var cpuprofile = flag.String("prof", "", "Output profiling file.") tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.")
var queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") tripleType = flag.String("format", "cquad", `Triple format to use for loading ("cquad" or "nquad").`)
var configFile = flag.String("config", "", "Path to an explicit configuration file.") cpuprofile = flag.String("prof", "", "Output profiling file.")
queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.")
configFile = flag.String("config", "", "Path to an explicit configuration file.")
)
// Filled in by `go build ldflags="-X main.VERSION `ver`"`. // Filled in by `go build ldflags="-X main.VERSION `ver`"`.
var BUILD_DATE string var (
var VERSION string BUILD_DATE string
VERSION string
)
func Usage() { func Usage() {
fmt.Println("Cayley is a graph store and graph query layer.") fmt.Println("Cayley is a graph store and graph query layer.")
@ -100,40 +116,140 @@ func main() {
fmt.Println("Cayley snapshot") fmt.Println("Cayley snapshot")
} }
os.Exit(0) os.Exit(0)
case "init": case "init":
err = db.Init(cfg, *tripleFile) err = db.Init(cfg)
if err != nil {
break
}
if *tripleFile != "" {
ts, err = db.Open(cfg)
if err != nil {
break
}
err = load(ts, cfg, *tripleFile, *tripleType)
if err != nil {
break
}
ts.Close()
}
case "load": case "load":
ts, err = db.Open(cfg) ts, err = db.Open(cfg)
if err != nil { if err != nil {
break break
} }
err = db.Load(ts, cfg, *tripleFile) err = load(ts, cfg, "", *tripleType)
if err != nil { if err != nil {
break break
} }
ts.Close() ts.Close()
case "repl": case "repl":
ts, err = db.Open(cfg) ts, err = db.Open(cfg)
if err != nil { if err != nil {
break break
} }
err = db.Repl(ts, *queryLanguage, cfg) if !graph.IsPersistent(cfg.DatabaseType) {
if err != nil { err = load(ts, cfg, "", *tripleType)
break if err != nil {
break
}
} }
err = db.Repl(ts, *queryLanguage, cfg)
ts.Close() ts.Close()
case "http": case "http":
ts, err = db.Open(cfg) ts, err = db.Open(cfg)
if err != nil { if err != nil {
break break
} }
if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", *tripleType)
if err != nil {
break
}
}
http.Serve(ts, cfg) http.Serve(ts, cfg)
ts.Close() ts.Close()
default: default:
fmt.Println("No command", cmd) fmt.Println("No command", cmd)
flag.Usage() flag.Usage()
} }
if err != nil { if err != nil {
glog.Fatalln(err) glog.Errorln(err)
}
}
func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error {
var r io.Reader
if path == "" {
path = cfg.DatabasePath
}
u, err := url.Parse(path)
if err != nil || u.Scheme == "file" || u.Scheme == "" {
// Don't alter relative URL path or non-URL path parameter.
if u.Scheme != "" && err == nil {
// Recovery heuristic for mistyping "file://path/to/file".
path = filepath.Join(u.Host, u.Path)
}
f, err := os.Open(path)
if err != nil {
return fmt.Errorf("could not open file %q: %v", path, err)
}
defer f.Close()
r = f
} else {
res, err := client.Get(path)
if err != nil {
return fmt.Errorf("could not get resource <%s>: %v", u, err)
}
defer res.Body.Close()
r = res.Body
}
r, err = decompressor(r)
if err != nil {
return err
}
var dec quad.Unmarshaler
switch typ {
case "cquad":
dec = cquads.NewDecoder(r)
case "nquad":
dec = nquads.NewDecoder(r)
default:
return fmt.Errorf("unknown quad format %q", typ)
}
return db.Load(ts, cfg, dec)
}
const (
gzipMagic = "\x1f\x8b"
b2zipMagic = "BZh"
)
func decompressor(r io.Reader) (io.Reader, error) {
br := bufio.NewReader(r)
buf, err := br.Peek(3)
if err != nil {
return nil, err
}
switch {
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
return gzip.NewReader(br)
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
return bzip2.NewReader(br), nil
default:
return br, nil
} }
} }

View file

@ -15,6 +15,11 @@
package main package main
import ( import (
"bytes"
"compress/bzip2"
"compress/gzip"
"io"
"strings"
"sync" "sync"
"testing" "testing"
"time" "time"
@ -308,6 +313,13 @@ func prepare(t testing.TB) {
if err != nil { if err != nil {
t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err) t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err)
} }
if !graph.IsPersistent(cfg.DatabaseType) {
err = load(ts, cfg, "", "cquad")
if err != nil {
t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err)
}
}
}) })
} }
@ -408,3 +420,84 @@ func BenchmarkKeanuOther(b *testing.B) {
func BenchmarkKeanuBullockOther(b *testing.B) { func BenchmarkKeanuBullockOther(b *testing.B) {
runBench(8, b) runBench(8, b)
} }
// reader is a test helper to filter non-io.Reader methods from the contained io.Reader.
type reader struct {
r io.Reader
}
func (r reader) Read(p []byte) (int, error) {
return r.r.Read(p)
}
var testDecompressor = []struct {
message string
input io.Reader
expect []byte
err error
readErr error
}{
{
message: "text input",
input: strings.NewReader("cayley data\n"),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "gzip input",
input: bytes.NewReader([]byte{
0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad,
0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00,
}),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bzip2 input",
input: bytes.NewReader([]byte{
0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00,
0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c,
0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16,
0xa9, 0x7c, 0x78, 0x80,
}),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bad gzip input",
input: strings.NewReader("\x1f\x8bcayley data\n"),
err: gzip.ErrHeader,
expect: nil,
readErr: nil,
},
{
message: "bad bzip2 input",
input: strings.NewReader("\x42\x5a\x68cayley data\n"),
err: nil,
expect: nil,
readErr: bzip2.StructuralError("invalid compression level"),
},
}
func TestDecompressor(t *testing.T) {
for _, test := range testDecompressor {
r, err := decompressor(test.input)
if err != test.err {
t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err)
}
if err != nil {
continue
}
p := make([]byte, len(test.expect)*2)
n, err := r.Read(p)
if err != test.readErr {
t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err)
}
if bytes.Compare(p[:n], test.expect) != 0 {
t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect)
}
}
}

View file

@ -15,46 +15,48 @@
package db package db
import ( import (
"bytes" "errors"
"compress/bzip2"
"compress/gzip"
"fmt" "fmt"
"io" "io"
"os"
"github.com/barakmich/glog" "github.com/barakmich/glog"
"github.com/google/cayley/config" "github.com/google/cayley/config"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
"github.com/google/cayley/quad" "github.com/google/cayley/quad"
"github.com/google/cayley/quad/cquads"
) )
func Load(ts graph.TripleStore, cfg *config.Config, path string) error { var ErrNotPersistent = errors.New("database type is not persistent")
f, err := os.Open(path)
if err != nil {
return fmt.Errorf("could not open file %q: %v", path, err)
}
defer f.Close()
r, err := decompressor(f) func Init(cfg *config.Config) error {
if err != nil { if !graph.IsPersistent(cfg.DatabaseType) {
glog.Fatalln(err) return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent)
} }
dec := cquads.NewDecoder(r) return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
}
func Open(cfg *config.Config) (graph.TripleStore, error) {
glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath)
ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
if err != nil {
return nil, err
}
return ts, nil
}
func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error {
bulker, canBulk := ts.(graph.BulkLoader) bulker, canBulk := ts.(graph.BulkLoader)
if canBulk { if canBulk {
err = bulker.BulkLoad(dec) switch err := bulker.BulkLoad(dec); err {
if err == nil { case nil:
return nil return nil
case graph.ErrCannotBulkLoad:
// Try individual loading.
default:
return err
} }
if err == graph.ErrCannotBulkLoad {
err = nil
}
}
if err != nil {
return err
} }
block := make([]quad.Quad, 0, cfg.LoadSize) block := make([]quad.Quad, 0, cfg.LoadSize)
@ -76,29 +78,3 @@ func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
return nil return nil
} }
const (
gzipMagic = "\x1f\x8b"
b2zipMagic = "BZh"
)
type readAtReader interface {
io.Reader
io.ReaderAt
}
func decompressor(r readAtReader) (io.Reader, error) {
var buf [3]byte
_, err := r.ReadAt(buf[:], 0)
if err != nil {
return nil, err
}
switch {
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
return gzip.NewReader(r)
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
return bzip2.NewReader(r), nil
default:
return r, nil
}
}

View file

@ -1,39 +0,0 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package db
import (
"github.com/google/cayley/config"
"github.com/google/cayley/graph"
)
func Init(cfg *config.Config, triplePath string) error {
err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
if err != nil {
return err
}
if triplePath != "" {
ts, err := Open(cfg)
if err != nil {
return err
}
err = Load(ts, cfg, triplePath)
if err != nil {
return err
}
ts.Close()
}
return err
}

View file

@ -1,81 +0,0 @@
package db
import (
"bytes"
"compress/bzip2"
"compress/gzip"
"testing"
)
var testDecompressor = []struct {
message string
input []byte
expect []byte
err error
readErr error
}{
{
message: "text input",
input: []byte("cayley data\n"),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "gzip input",
input: []byte{
0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad,
0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00,
},
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bzip2 input",
input: []byte{
0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00,
0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c,
0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16,
0xa9, 0x7c, 0x78, 0x80,
},
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bad gzip input",
input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
err: gzip.ErrHeader,
expect: nil,
readErr: nil,
},
{
message: "bad bzip2 input",
input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
err: nil,
expect: nil,
readErr: bzip2.StructuralError("invalid compression level"),
},
}
func TestDecompressor(t *testing.T) {
for _, test := range testDecompressor {
buf := bytes.NewReader(test.input)
r, err := decompressor(buf)
if err != test.err {
t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err)
}
if err != nil {
continue
}
p := make([]byte, len(test.expect)*2)
n, err := r.Read(p)
if err != test.readErr {
t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err)
}
if bytes.Compare(p[:n], test.expect) != 0 {
t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect)
}
}
}

View file

@ -1,40 +0,0 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package db
import (
"github.com/barakmich/glog"
"github.com/google/cayley/config"
"github.com/google/cayley/graph"
)
func Open(cfg *config.Config) (graph.TripleStore, error) {
glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath)
ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
if err != nil {
return nil, err
}
// Memstore is not persistent, so it MUST be loaded.
if cfg.DatabaseType == "memstore" {
err = Load(ts, cfg, cfg.DatabasePath)
if err != nil {
return nil, err
}
}
return ts, nil
}

View file

@ -34,7 +34,7 @@ import (
) )
func init() { func init() {
graph.RegisterTripleStore("leveldb", newTripleStore, createNewLevelDB) graph.RegisterTripleStore("leveldb", true, newTripleStore, createNewLevelDB)
} }
const ( const (

View file

@ -26,7 +26,7 @@ import (
) )
func init() { func init() {
graph.RegisterTripleStore("memstore", func(string, graph.Options) (graph.TripleStore, error) { graph.RegisterTripleStore("memstore", false, func(string, graph.Options) (graph.TripleStore, error) {
return newTripleStore(), nil return newTripleStore(), nil
}, nil) }, nil)
} }

View file

@ -30,7 +30,7 @@ import (
) )
func init() { func init() {
graph.RegisterTripleStore("mongo", newTripleStore, createNewMongoGraph) graph.RegisterTripleStore("mongo", true, newTripleStore, createNewMongoGraph)
} }
// Guarantee we satisfy graph.Bulkloader. // Guarantee we satisfy graph.Bulkloader.

View file

@ -136,38 +136,45 @@ type BulkLoader interface {
type NewStoreFunc func(string, Options) (TripleStore, error) type NewStoreFunc func(string, Options) (TripleStore, error)
type InitStoreFunc func(string, Options) error type InitStoreFunc func(string, Options) error
var storeRegistry = make(map[string]NewStoreFunc) type register struct {
var storeInitRegistry = make(map[string]InitStoreFunc) newFunc NewStoreFunc
initFunc InitStoreFunc
isPersistent bool
}
func RegisterTripleStore(name string, newFunc NewStoreFunc, initFunc InitStoreFunc) { var storeRegistry = make(map[string]register)
func RegisterTripleStore(name string, persists bool, newFunc NewStoreFunc, initFunc InitStoreFunc) {
if _, found := storeRegistry[name]; found { if _, found := storeRegistry[name]; found {
panic("already registered TripleStore " + name) panic("already registered TripleStore " + name)
} }
storeRegistry[name] = newFunc storeRegistry[name] = register{
if initFunc != nil { newFunc: newFunc,
storeInitRegistry[name] = initFunc initFunc: initFunc,
isPersistent: persists,
} }
} }
func NewTripleStore(name, dbpath string, opts Options) (TripleStore, error) { func NewTripleStore(name, dbpath string, opts Options) (TripleStore, error) {
newFunc, hasNew := storeRegistry[name] r, registered := storeRegistry[name]
if !hasNew { if !registered {
return nil, errors.New("triplestore: name '" + name + "' is not registered") return nil, errors.New("triplestore: name '" + name + "' is not registered")
} }
return newFunc(dbpath, opts) return r.newFunc(dbpath, opts)
} }
func InitTripleStore(name, dbpath string, opts Options) error { func InitTripleStore(name, dbpath string, opts Options) error {
initFunc, hasInit := storeInitRegistry[name] r, registered := storeRegistry[name]
if hasInit { if registered {
return initFunc(dbpath, opts) return r.initFunc(dbpath, opts)
}
if _, isRegistered := storeRegistry[name]; isRegistered {
return nil
} }
return errors.New("triplestore: name '" + name + "' is not registered") return errors.New("triplestore: name '" + name + "' is not registered")
} }
func IsPersistent(name string) bool {
return storeRegistry[name].isPersistent
}
func TripleStores() []string { func TripleStores() []string {
t := make([]string, 0, len(storeRegistry)) t := make([]string, 0, len(storeRegistry))
for n := range storeRegistry { for n := range storeRegistry {