diff --git a/README.md b/README.md index 65c0faa..8d7752c 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,9 @@ Its goal is to be a part of the developer's toolbox where [Linked Data](http://l * JavaScript, with a [Gremlin](http://gremlindocs.com/)-inspired\* graph object. * (simplified) [MQL](https://developers.google.com/freebase/v1/mql-overview), for Freebase fans * Plays well with multiple backend stores: - * [LevelDB](http://code.google.com/p/leveldb/) for single-machine storage - * [MongoDB](http://mongodb.org) + * [LevelDB](http://code.google.com/p/leveldb/) + * [Bolt](http://github.com/boltdb/bolt) + * [MongoDB](http://mongodb.org) for distributed stores * In-memory, ephemeral * Modular design; easy to extend with new languages and backends * Good test coverage diff --git a/cayley_test.go b/cayley_test.go index 02d4f4a..d8d15dd 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -18,8 +18,10 @@ import ( "bytes" "compress/bzip2" "compress/gzip" + "flag" "fmt" "io" + "os" "reflect" "sort" "strings" @@ -33,6 +35,8 @@ import ( "github.com/google/cayley/query/gremlin" ) +var backend = flag.String("backend", "memstore", "Which backend to test. Loads test data to /tmp if not present.") + var benchmarkQueries = []struct { message string long bool @@ -378,15 +382,42 @@ var ( ) func prepare(t testing.TB) { + switch *backend { + case "memstore": + break + case "leveldb": + fallthrough + case "bolt": + cfg.DatabaseType = *backend + cfg.DatabasePath = fmt.Sprint("/tmp/cayley_test_", *backend) + cfg.DatabaseOptions = map[string]interface{}{ + "nosync": true, // It's a test. If we need to load, do it fast. + } + default: + t.Fatalf("Untestable backend store %s", *backend) + } + var err error create.Do(func() { + needsLoad := true + if graph.IsPersistent(cfg.DatabaseType) { + if _, err := os.Stat(cfg.DatabasePath); os.IsNotExist(err) { + err = db.Init(cfg) + if err != nil { + t.Fatalf("Could not initialize database: %v", err) + } + } else { + needsLoad = false + } + } + handle, err = db.Open(cfg) if err != nil { t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err) } - if !graph.IsPersistent(cfg.DatabaseType) { - err = load(handle.QuadWriter, cfg, "", "cquad") + if needsLoad { + err = load(handle.QuadWriter, cfg, "30kmoviedata.nq.gz", "cquad") if err != nil { t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) } diff --git a/docs/Configuration.md b/docs/Configuration.md index 4b6b898..bd3bf11 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -23,7 +23,8 @@ All command line flags take precedence over the configuration file. * `mem`: An in-memory store, based on an initial N-Quads file. Loses all changes when the process exits. * `leveldb`: A persistent on-disk store backed by [LevelDB](http://code.google.com/p/leveldb/). - * `mongodb`: Stores the graph data and indices in a [MongoDB](http://mongodb.org) instance. Slower, as it incurs network traffic, but multiple Cayley instances can disappear and reconnect at will, across a potentially horizontally-scaled store. + * `bolt`: Stores the graph data on-disk in a [Bolt](http://github.com/boltdb/bolt) file. Uses more disk space and memory than LevelDB for smaller stores, but is often faster to write to and comparable for large ones, with faster average query times. + * `mongo`: Stores the graph data and indices in a [MongoDB](http://mongodb.org) instance. Slower, as it incurs network traffic, but multiple Cayley instances can disappear and reconnect at will, across a potentially horizontally-scaled store. #### **`db_path`** @@ -32,9 +33,10 @@ All command line flags take precedence over the configuration file. Where does the database actually live? Dependent on the type of database. For each datastore: - * `mem`: Path to a triple file to automatically load - * `leveldb`: Directory to hold the LevelDB database files - * `mongodb`: "hostname:port" of the desired MongoDB server. + * `mem`: Path to a triple file to automatically load. + * `leveldb`: Directory to hold the LevelDB database files. + * `bolt`: Path to the persistent single Bolt database file. + * `mongo`: "hostname:port" of the desired MongoDB server. #### **`listen_host`** @@ -103,8 +105,16 @@ The size in MiB of the LevelDB write cache. Increasing this number allows for mo The size in MiB of the LevelDB block cache. Increasing this number uses more memory to maintain a bigger cache of triple blocks for better performance. +### Bolt -### MongoDB +#### **`nosync`** + + * Type: Boolean + * Default: false + +Optionally disable syncing to disk per transaction. Nosync being true means much faster load times, but without consistency guarantees. + +### Mongo #### **`database_name`** diff --git a/docs/Overview.md b/docs/Overview.md index 300d005..39ded35 100644 --- a/docs/Overview.md +++ b/docs/Overview.md @@ -17,7 +17,8 @@ You can set up a full [configuration file](/docs/Configuration) if you'd prefer, Examples for each backend: * `leveldb`: `./cayley init --db=leveldb --dbpath=/tmp/moviedb` -- where /tmp/moviedb is the path you'd like to store your data. - * `mongodb`: `./cayley init --db=mongodb --dbpath=":"` -- where HOSTNAME and PORT point to your Mongo instance. + * `bolt`: `./cayley init --db=bolt --dbpath=/tmp/moviedb` -- where /tmp/moviedb is the filename where you'd like to store your data. + * `mongo`: `./cayley init --db=mongo --dbpath=":"` -- where HOSTNAME and PORT point to your Mongo instance. Those two options (db and dbpath) are always going to be present. If you feel like not repeating yourself, setting up a configuration file for your backend might be something to do now. There's an example file, `cayley.cfg.example` in the root directory. diff --git a/graph/bolt/iterator.go b/graph/bolt/iterator.go index 07c07bb..a53f6ed 100644 --- a/graph/bolt/iterator.go +++ b/graph/bolt/iterator.go @@ -17,6 +17,7 @@ package bolt import ( "bytes" "encoding/json" + "errors" "fmt" "strings" @@ -29,8 +30,9 @@ import ( ) var ( - boltType graph.Type - bufferSize = 50 + boltType graph.Type + bufferSize = 50 + errNotExist = errors.New("Quad does not exist") ) func init() { @@ -141,7 +143,7 @@ func (it *Iterator) Next() bool { i++ } else { it.buffer = append(it.buffer, nil) - return quad.ErrNotExist + return errNotExist } } else { k, _ := cur.Seek(last) @@ -167,7 +169,7 @@ func (it *Iterator) Next() bool { return nil }) if err != nil { - if err != quad.ErrNotExist { + if err != errNotExist { glog.Error("Error nexting in database: ", err) } it.done = true diff --git a/quad/quad.go b/quad/quad.go index 4928040..eaf7d98 100644 --- a/quad/quad.go +++ b/quad/quad.go @@ -44,7 +44,6 @@ import ( var ( ErrInvalid = errors.New("invalid N-Quad") ErrIncomplete = errors.New("incomplete N-Quad") - ErrNotExist = errors.New("Quad does not exist") ) // Our triple struct, used throughout.