Allow optional strict N-Quad parsing
This puts more of the logic in cayley, but other approaches require that db knows about quad formats.
This commit is contained in:
parent
4c3f5109eb
commit
979a0c4aee
5 changed files with 177 additions and 164 deletions
101
cayley.go
101
cayley.go
|
|
@ -17,8 +17,12 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
|
|
@ -28,6 +32,9 @@ import (
|
|||
"github.com/google/cayley/db"
|
||||
"github.com/google/cayley/graph"
|
||||
"github.com/google/cayley/http"
|
||||
"github.com/google/cayley/quad"
|
||||
"github.com/google/cayley/quad/cquads"
|
||||
"github.com/google/cayley/quad/nquads"
|
||||
|
||||
// Load all supported backends.
|
||||
_ "github.com/google/cayley/graph/leveldb"
|
||||
|
|
@ -35,14 +42,19 @@ import (
|
|||
_ "github.com/google/cayley/graph/mongo"
|
||||
)
|
||||
|
||||
var tripleFile = flag.String("triples", "", "Triple file to load for database init.")
|
||||
var cpuprofile = flag.String("prof", "", "Output profiling file.")
|
||||
var queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.")
|
||||
var configFile = flag.String("config", "", "Path to an explicit configuration file.")
|
||||
var (
|
||||
tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.")
|
||||
tripleType = flag.String("format", "cquad", `Triple format to use for loading ("cquad" or "nquad").`)
|
||||
cpuprofile = flag.String("prof", "", "Output profiling file.")
|
||||
queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.")
|
||||
configFile = flag.String("config", "", "Path to an explicit configuration file.")
|
||||
)
|
||||
|
||||
// Filled in by `go build ldflags="-X main.VERSION `ver`"`.
|
||||
var BUILD_DATE string
|
||||
var VERSION string
|
||||
var (
|
||||
BUILD_DATE string
|
||||
VERSION string
|
||||
)
|
||||
|
||||
func Usage() {
|
||||
fmt.Println("Cayley is a graph store and graph query layer.")
|
||||
|
|
@ -102,14 +114,28 @@ func main() {
|
|||
os.Exit(0)
|
||||
|
||||
case "init":
|
||||
err = db.Init(cfg, *tripleFile)
|
||||
err = db.Init(cfg)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if *tripleFile != "" {
|
||||
ts, err = db.Open(cfg)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
err = load(ts, cfg, *tripleFile, *tripleType)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
ts.Close()
|
||||
}
|
||||
|
||||
case "load":
|
||||
ts, err = db.Open(cfg)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
err = db.Load(ts, cfg, cfg.DatabasePath)
|
||||
err = load(ts, cfg, "", *tripleType)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
|
@ -122,7 +148,7 @@ func main() {
|
|||
break
|
||||
}
|
||||
if !graph.IsPersistent(cfg.DatabaseType) {
|
||||
err = db.Load(ts, cfg, cfg.DatabasePath)
|
||||
err = load(ts, cfg, "", *tripleType)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
|
@ -138,7 +164,7 @@ func main() {
|
|||
break
|
||||
}
|
||||
if !graph.IsPersistent(cfg.DatabaseType) {
|
||||
err = db.Load(ts, cfg, cfg.DatabasePath)
|
||||
err = load(ts, cfg, "", *tripleType)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
|
@ -156,3 +182,58 @@ func main() {
|
|||
glog.Errorln(err)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(kortschak) Make path a URI to allow pointing to any resource.
|
||||
func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error {
|
||||
if path == "" {
|
||||
path = cfg.DatabasePath
|
||||
}
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open file %q: %v", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r, err := decompressor(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var dec quad.Unmarshaler
|
||||
switch typ {
|
||||
case "cquad":
|
||||
dec = cquads.NewDecoder(r)
|
||||
case "nquad":
|
||||
dec = nquads.NewDecoder(r)
|
||||
default:
|
||||
return fmt.Errorf("unknown quad format %q", typ)
|
||||
}
|
||||
|
||||
return db.Load(ts, cfg, dec)
|
||||
}
|
||||
|
||||
const (
|
||||
gzipMagic = "\x1f\x8b"
|
||||
b2zipMagic = "BZh"
|
||||
)
|
||||
|
||||
type readAtReader interface {
|
||||
io.Reader
|
||||
io.ReaderAt
|
||||
}
|
||||
|
||||
func decompressor(r readAtReader) (io.Reader, error) {
|
||||
var buf [3]byte
|
||||
_, err := r.ReadAt(buf[:], 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch {
|
||||
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
|
||||
return gzip.NewReader(r)
|
||||
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
|
||||
return bzip2.NewReader(r), nil
|
||||
default:
|
||||
return r, nil
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
|
@ -310,7 +313,7 @@ func prepare(t testing.TB) {
|
|||
}
|
||||
|
||||
if !graph.IsPersistent(cfg.DatabaseType) {
|
||||
err = db.Load(ts, cfg, cfg.DatabasePath)
|
||||
err = load(ts, cfg, "", "cquad")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err)
|
||||
}
|
||||
|
|
@ -415,3 +418,76 @@ func BenchmarkKeanuOther(b *testing.B) {
|
|||
func BenchmarkKeanuBullockOther(b *testing.B) {
|
||||
runBench(8, b)
|
||||
}
|
||||
|
||||
var testDecompressor = []struct {
|
||||
message string
|
||||
input []byte
|
||||
expect []byte
|
||||
err error
|
||||
readErr error
|
||||
}{
|
||||
{
|
||||
message: "text input",
|
||||
input: []byte("cayley data\n"),
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "gzip input",
|
||||
input: []byte{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad,
|
||||
0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00,
|
||||
},
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bzip2 input",
|
||||
input: []byte{
|
||||
0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00,
|
||||
0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c,
|
||||
0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16,
|
||||
0xa9, 0x7c, 0x78, 0x80,
|
||||
},
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bad gzip input",
|
||||
input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
|
||||
err: gzip.ErrHeader,
|
||||
expect: nil,
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bad bzip2 input",
|
||||
input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
|
||||
err: nil,
|
||||
expect: nil,
|
||||
readErr: bzip2.StructuralError("invalid compression level"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestDecompressor(t *testing.T) {
|
||||
for _, test := range testDecompressor {
|
||||
buf := bytes.NewReader(test.input)
|
||||
r, err := decompressor(buf)
|
||||
if err != test.err {
|
||||
t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err)
|
||||
}
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
p := make([]byte, len(test.expect)*2)
|
||||
n, err := r.Read(p)
|
||||
if err != test.readErr {
|
||||
t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err)
|
||||
}
|
||||
if bytes.Compare(p[:n], test.expect) != 0 {
|
||||
t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
19
db/init.go
19
db/init.go
|
|
@ -24,25 +24,10 @@ import (
|
|||
|
||||
var ErrNotPersistent = errors.New("database type is not persistent")
|
||||
|
||||
func Init(cfg *config.Config, triplePath string) error {
|
||||
func Init(cfg *config.Config) error {
|
||||
if !graph.IsPersistent(cfg.DatabaseType) {
|
||||
return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent)
|
||||
}
|
||||
|
||||
err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if triplePath != "" {
|
||||
ts, err := Open(cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = Load(ts, cfg, triplePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ts.Close()
|
||||
}
|
||||
return err
|
||||
return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
|
||||
}
|
||||
|
|
|
|||
62
db/load.go
62
db/load.go
|
|
@ -15,46 +15,24 @@
|
|||
package db
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
"github.com/google/cayley/config"
|
||||
"github.com/google/cayley/graph"
|
||||
"github.com/google/cayley/quad"
|
||||
"github.com/google/cayley/quad/cquads"
|
||||
)
|
||||
|
||||
func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open file %q: %v", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r, err := decompressor(f)
|
||||
if err != nil {
|
||||
glog.Fatalln(err)
|
||||
}
|
||||
|
||||
dec := cquads.NewDecoder(r)
|
||||
|
||||
func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error {
|
||||
bulker, canBulk := ts.(graph.BulkLoader)
|
||||
if canBulk {
|
||||
err = bulker.BulkLoad(dec)
|
||||
if err == nil {
|
||||
switch err := bulker.BulkLoad(dec); err {
|
||||
case nil:
|
||||
return nil
|
||||
case graph.ErrCannotBulkLoad:
|
||||
// Try individual loading.
|
||||
default:
|
||||
return err
|
||||
}
|
||||
if err == graph.ErrCannotBulkLoad {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
block := make([]quad.Quad, 0, cfg.LoadSize)
|
||||
|
|
@ -76,29 +54,3 @@ func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
gzipMagic = "\x1f\x8b"
|
||||
b2zipMagic = "BZh"
|
||||
)
|
||||
|
||||
type readAtReader interface {
|
||||
io.Reader
|
||||
io.ReaderAt
|
||||
}
|
||||
|
||||
func decompressor(r readAtReader) (io.Reader, error) {
|
||||
var buf [3]byte
|
||||
_, err := r.ReadAt(buf[:], 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch {
|
||||
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
|
||||
return gzip.NewReader(r)
|
||||
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
|
||||
return bzip2.NewReader(r), nil
|
||||
default:
|
||||
return r, nil
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,81 +0,0 @@
|
|||
package db
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var testDecompressor = []struct {
|
||||
message string
|
||||
input []byte
|
||||
expect []byte
|
||||
err error
|
||||
readErr error
|
||||
}{
|
||||
{
|
||||
message: "text input",
|
||||
input: []byte("cayley data\n"),
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "gzip input",
|
||||
input: []byte{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad,
|
||||
0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00,
|
||||
},
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bzip2 input",
|
||||
input: []byte{
|
||||
0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00,
|
||||
0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c,
|
||||
0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16,
|
||||
0xa9, 0x7c, 0x78, 0x80,
|
||||
},
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bad gzip input",
|
||||
input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
|
||||
err: gzip.ErrHeader,
|
||||
expect: nil,
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bad bzip2 input",
|
||||
input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
|
||||
err: nil,
|
||||
expect: nil,
|
||||
readErr: bzip2.StructuralError("invalid compression level"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestDecompressor(t *testing.T) {
|
||||
for _, test := range testDecompressor {
|
||||
buf := bytes.NewReader(test.input)
|
||||
r, err := decompressor(buf)
|
||||
if err != test.err {
|
||||
t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err)
|
||||
}
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
p := make([]byte, len(test.expect)*2)
|
||||
n, err := r.Read(p)
|
||||
if err != test.readErr {
|
||||
t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err)
|
||||
}
|
||||
if bytes.Compare(p[:n], test.expect) != 0 {
|
||||
t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue