Documentation and tests

This commit is contained in:
Barak Michener 2014-08-23 18:05:04 -04:00
parent e2debf5f04
commit d0fcdf4299
6 changed files with 59 additions and 15 deletions

View file

@ -27,8 +27,9 @@ Its goal is to be a part of the developer's toolbox where [Linked Data](http://l
* JavaScript, with a [Gremlin](http://gremlindocs.com/)-inspired\* graph object.
* (simplified) [MQL](https://developers.google.com/freebase/v1/mql-overview), for Freebase fans
* Plays well with multiple backend stores:
* [LevelDB](http://code.google.com/p/leveldb/) for single-machine storage
* [MongoDB](http://mongodb.org)
* [LevelDB](http://code.google.com/p/leveldb/)
* [Bolt](http://github.com/boltdb/bolt)
* [MongoDB](http://mongodb.org) for distributed stores
* In-memory, ephemeral
* Modular design; easy to extend with new languages and backends
* Good test coverage

View file

@ -18,8 +18,10 @@ import (
"bytes"
"compress/bzip2"
"compress/gzip"
"flag"
"fmt"
"io"
"os"
"reflect"
"sort"
"strings"
@ -33,6 +35,8 @@ import (
"github.com/google/cayley/query/gremlin"
)
var backend = flag.String("backend", "memstore", "Which backend to test. Loads test data to /tmp if not present.")
var benchmarkQueries = []struct {
message string
long bool
@ -378,15 +382,42 @@ var (
)
func prepare(t testing.TB) {
switch *backend {
case "memstore":
break
case "leveldb":
fallthrough
case "bolt":
cfg.DatabaseType = *backend
cfg.DatabasePath = fmt.Sprint("/tmp/cayley_test_", *backend)
cfg.DatabaseOptions = map[string]interface{}{
"nosync": true, // It's a test. If we need to load, do it fast.
}
default:
t.Fatalf("Untestable backend store %s", *backend)
}
var err error
create.Do(func() {
needsLoad := true
if graph.IsPersistent(cfg.DatabaseType) {
if _, err := os.Stat(cfg.DatabasePath); os.IsNotExist(err) {
err = db.Init(cfg)
if err != nil {
t.Fatalf("Could not initialize database: %v", err)
}
} else {
needsLoad = false
}
}
handle, err = db.Open(cfg)
if err != nil {
t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err)
}
if !graph.IsPersistent(cfg.DatabaseType) {
err = load(handle.QuadWriter, cfg, "", "cquad")
if needsLoad {
err = load(handle.QuadWriter, cfg, "30kmoviedata.nq.gz", "cquad")
if err != nil {
t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err)
}

View file

@ -23,7 +23,8 @@ All command line flags take precedence over the configuration file.
* `mem`: An in-memory store, based on an initial N-Quads file. Loses all changes when the process exits.
* `leveldb`: A persistent on-disk store backed by [LevelDB](http://code.google.com/p/leveldb/).
* `mongodb`: Stores the graph data and indices in a [MongoDB](http://mongodb.org) instance. Slower, as it incurs network traffic, but multiple Cayley instances can disappear and reconnect at will, across a potentially horizontally-scaled store.
* `bolt`: Stores the graph data on-disk in a [Bolt](http://github.com/boltdb/bolt) file. Uses more disk space and memory than LevelDB for smaller stores, but is often faster to write to and comparable for large ones, with faster average query times.
* `mongo`: Stores the graph data and indices in a [MongoDB](http://mongodb.org) instance. Slower, as it incurs network traffic, but multiple Cayley instances can disappear and reconnect at will, across a potentially horizontally-scaled store.
#### **`db_path`**
@ -32,9 +33,10 @@ All command line flags take precedence over the configuration file.
Where does the database actually live? Dependent on the type of database. For each datastore:
* `mem`: Path to a triple file to automatically load
* `leveldb`: Directory to hold the LevelDB database files
* `mongodb`: "hostname:port" of the desired MongoDB server.
* `mem`: Path to a triple file to automatically load.
* `leveldb`: Directory to hold the LevelDB database files.
* `bolt`: Path to the persistent single Bolt database file.
* `mongo`: "hostname:port" of the desired MongoDB server.
#### **`listen_host`**
@ -103,8 +105,16 @@ The size in MiB of the LevelDB write cache. Increasing this number allows for mo
The size in MiB of the LevelDB block cache. Increasing this number uses more memory to maintain a bigger cache of triple blocks for better performance.
### Bolt
### MongoDB
#### **`nosync`**
* Type: Boolean
* Default: false
Optionally disable syncing to disk per transaction. Nosync being true means much faster load times, but without consistency guarantees.
### Mongo
#### **`database_name`**

View file

@ -17,7 +17,8 @@ You can set up a full [configuration file](/docs/Configuration) if you'd prefer,
Examples for each backend:
* `leveldb`: `./cayley init --db=leveldb --dbpath=/tmp/moviedb` -- where /tmp/moviedb is the path you'd like to store your data.
* `mongodb`: `./cayley init --db=mongodb --dbpath="<HOSTNAME>:<PORT>"` -- where HOSTNAME and PORT point to your Mongo instance.
* `bolt`: `./cayley init --db=bolt --dbpath=/tmp/moviedb` -- where /tmp/moviedb is the filename where you'd like to store your data.
* `mongo`: `./cayley init --db=mongo --dbpath="<HOSTNAME>:<PORT>"` -- where HOSTNAME and PORT point to your Mongo instance.
Those two options (db and dbpath) are always going to be present. If you feel like not repeating yourself, setting up a configuration file for your backend might be something to do now. There's an example file, `cayley.cfg.example` in the root directory.

View file

@ -17,6 +17,7 @@ package bolt
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"strings"
@ -31,6 +32,7 @@ import (
var (
boltType graph.Type
bufferSize = 50
errNotExist = errors.New("Quad does not exist")
)
func init() {
@ -141,7 +143,7 @@ func (it *Iterator) Next() bool {
i++
} else {
it.buffer = append(it.buffer, nil)
return quad.ErrNotExist
return errNotExist
}
} else {
k, _ := cur.Seek(last)
@ -167,7 +169,7 @@ func (it *Iterator) Next() bool {
return nil
})
if err != nil {
if err != quad.ErrNotExist {
if err != errNotExist {
glog.Error("Error nexting in database: ", err)
}
it.done = true

View file

@ -44,7 +44,6 @@ import (
var (
ErrInvalid = errors.New("invalid N-Quad")
ErrIncomplete = errors.New("incomplete N-Quad")
ErrNotExist = errors.New("Quad does not exist")
)
// Our triple struct, used throughout.