From 2d4c07b56d4b063919c7f72d4322e5dfff2399c1 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 08:36:49 +0930 Subject: [PATCH 1/7] Make db name-literal agnostic Move the persistence characteristic of the store into the graph registry and provide an API hook to get that information. Add error return for init on a non-persistent store. Updates #35. --- cayley.go | 2 +- db/init.go | 9 +++++++++ db/open.go | 3 +-- graph/leveldb/triplestore.go | 2 +- graph/memstore/triplestore.go | 2 +- graph/mongo/triplestore.go | 2 +- graph/triplestore.go | 37 ++++++++++++++++++++++--------------- 7 files changed, 36 insertions(+), 21 deletions(-) diff --git a/cayley.go b/cayley.go index c68d487..b6017ce 100644 --- a/cayley.go +++ b/cayley.go @@ -134,6 +134,6 @@ func main() { flag.Usage() } if err != nil { - glog.Fatalln(err) + glog.Errorln(err) } } diff --git a/db/init.go b/db/init.go index a791a8f..650a854 100644 --- a/db/init.go +++ b/db/init.go @@ -15,11 +15,20 @@ package db import ( + "errors" + "fmt" + "github.com/google/cayley/config" "github.com/google/cayley/graph" ) +var ErrNotPersistent = errors.New("database type is not persistent") + func Init(cfg *config.Config, triplePath string) error { + if !graph.IsPersistent(cfg.DatabaseType) { + return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) + } + err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) if err != nil { return err diff --git a/db/open.go b/db/open.go index 3e1ee24..cef8127 100644 --- a/db/open.go +++ b/db/open.go @@ -28,8 +28,7 @@ func Open(cfg *config.Config) (graph.TripleStore, error) { return nil, err } - // Memstore is not persistent, so it MUST be loaded. - if cfg.DatabaseType == "memstore" { + if !graph.IsPersistent(cfg.DatabaseType) { err = Load(ts, cfg, cfg.DatabasePath) if err != nil { return nil, err diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index 1a305d2..7efb03f 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -34,7 +34,7 @@ import ( ) func init() { - graph.RegisterTripleStore("leveldb", newTripleStore, createNewLevelDB) + graph.RegisterTripleStore("leveldb", true, newTripleStore, createNewLevelDB) } const ( diff --git a/graph/memstore/triplestore.go b/graph/memstore/triplestore.go index 056998c..3641a60 100644 --- a/graph/memstore/triplestore.go +++ b/graph/memstore/triplestore.go @@ -26,7 +26,7 @@ import ( ) func init() { - graph.RegisterTripleStore("memstore", func(string, graph.Options) (graph.TripleStore, error) { + graph.RegisterTripleStore("memstore", false, func(string, graph.Options) (graph.TripleStore, error) { return newTripleStore(), nil }, nil) } diff --git a/graph/mongo/triplestore.go b/graph/mongo/triplestore.go index 20fea2f..364d195 100644 --- a/graph/mongo/triplestore.go +++ b/graph/mongo/triplestore.go @@ -30,7 +30,7 @@ import ( ) func init() { - graph.RegisterTripleStore("mongo", newTripleStore, createNewMongoGraph) + graph.RegisterTripleStore("mongo", true, newTripleStore, createNewMongoGraph) } // Guarantee we satisfy graph.Bulkloader. diff --git a/graph/triplestore.go b/graph/triplestore.go index b25ca74..9d45dd5 100644 --- a/graph/triplestore.go +++ b/graph/triplestore.go @@ -136,38 +136,45 @@ type BulkLoader interface { type NewStoreFunc func(string, Options) (TripleStore, error) type InitStoreFunc func(string, Options) error -var storeRegistry = make(map[string]NewStoreFunc) -var storeInitRegistry = make(map[string]InitStoreFunc) +type register struct { + newFunc NewStoreFunc + initFunc InitStoreFunc + isPersistent bool +} -func RegisterTripleStore(name string, newFunc NewStoreFunc, initFunc InitStoreFunc) { +var storeRegistry = make(map[string]register) + +func RegisterTripleStore(name string, persists bool, newFunc NewStoreFunc, initFunc InitStoreFunc) { if _, found := storeRegistry[name]; found { panic("already registered TripleStore " + name) } - storeRegistry[name] = newFunc - if initFunc != nil { - storeInitRegistry[name] = initFunc + storeRegistry[name] = register{ + newFunc: newFunc, + initFunc: initFunc, + isPersistent: persists, } } func NewTripleStore(name, dbpath string, opts Options) (TripleStore, error) { - newFunc, hasNew := storeRegistry[name] - if !hasNew { + r, registered := storeRegistry[name] + if !registered { return nil, errors.New("triplestore: name '" + name + "' is not registered") } - return newFunc(dbpath, opts) + return r.newFunc(dbpath, opts) } func InitTripleStore(name, dbpath string, opts Options) error { - initFunc, hasInit := storeInitRegistry[name] - if hasInit { - return initFunc(dbpath, opts) - } - if _, isRegistered := storeRegistry[name]; isRegistered { - return nil + r, registered := storeRegistry[name] + if registered { + return r.initFunc(dbpath, opts) } return errors.New("triplestore: name '" + name + "' is not registered") } +func IsPersistent(name string) bool { + return storeRegistry[name].isPersistent +} + func TripleStores() []string { t := make([]string, 0, len(storeRegistry)) for n := range storeRegistry { From 4c3f5109ebadd3450fb5cd4118bc9fc71046035b Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 09:31:02 +0930 Subject: [PATCH 2/7] Separate db.Open and db.Load Updates #82. --- cayley.go | 29 ++++++++++++++++++++++++----- cayley_test.go | 7 +++++++ db/open.go | 7 ------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/cayley.go b/cayley.go index b6017ce..a2a54dd 100644 --- a/cayley.go +++ b/cayley.go @@ -35,7 +35,7 @@ import ( _ "github.com/google/cayley/graph/mongo" ) -var tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.") +var tripleFile = flag.String("triples", "", "Triple file to load for database init.") var cpuprofile = flag.String("prof", "", "Output profiling file.") var queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") var configFile = flag.String("config", "", "Path to an explicit configuration file.") @@ -100,35 +100,54 @@ func main() { fmt.Println("Cayley snapshot") } os.Exit(0) + case "init": err = db.Init(cfg, *tripleFile) + case "load": ts, err = db.Open(cfg) if err != nil { break } - err = db.Load(ts, cfg, *tripleFile) + err = db.Load(ts, cfg, cfg.DatabasePath) if err != nil { break } + ts.Close() + case "repl": ts, err = db.Open(cfg) if err != nil { break } - err = db.Repl(ts, *queryLanguage, cfg) - if err != nil { - break + if !graph.IsPersistent(cfg.DatabaseType) { + err = db.Load(ts, cfg, cfg.DatabasePath) + if err != nil { + break + } } + + err = db.Repl(ts, *queryLanguage, cfg) + ts.Close() + case "http": ts, err = db.Open(cfg) if err != nil { break } + if !graph.IsPersistent(cfg.DatabaseType) { + err = db.Load(ts, cfg, cfg.DatabasePath) + if err != nil { + break + } + } + http.Serve(ts, cfg) + ts.Close() + default: fmt.Println("No command", cmd) flag.Usage() diff --git a/cayley_test.go b/cayley_test.go index 55ef3c2..e59ec53 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -308,6 +308,13 @@ func prepare(t testing.TB) { if err != nil { t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err) } + + if !graph.IsPersistent(cfg.DatabaseType) { + err = db.Load(ts, cfg, cfg.DatabasePath) + if err != nil { + t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) + } + } }) } diff --git a/db/open.go b/db/open.go index cef8127..bf1f2c7 100644 --- a/db/open.go +++ b/db/open.go @@ -28,12 +28,5 @@ func Open(cfg *config.Config) (graph.TripleStore, error) { return nil, err } - if !graph.IsPersistent(cfg.DatabaseType) { - err = Load(ts, cfg, cfg.DatabasePath) - if err != nil { - return nil, err - } - } - return ts, nil } From 979a0c4aeecb8a21d046ca635cb49ab02b9006a5 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 10:28:18 +0930 Subject: [PATCH 3/7] Allow optional strict N-Quad parsing This puts more of the logic in cayley, but other approaches require that db knows about quad formats. --- cayley.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++------ cayley_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++- db/init.go | 19 ++--------- db/load.go | 62 ++++------------------------------ db/load_test.go | 81 --------------------------------------------- 5 files changed, 177 insertions(+), 164 deletions(-) delete mode 100644 db/load_test.go diff --git a/cayley.go b/cayley.go index a2a54dd..fa0558e 100644 --- a/cayley.go +++ b/cayley.go @@ -17,8 +17,12 @@ package main import ( + "bytes" + "compress/bzip2" + "compress/gzip" "flag" "fmt" + "io" "os" "runtime" @@ -28,6 +32,9 @@ import ( "github.com/google/cayley/db" "github.com/google/cayley/graph" "github.com/google/cayley/http" + "github.com/google/cayley/quad" + "github.com/google/cayley/quad/cquads" + "github.com/google/cayley/quad/nquads" // Load all supported backends. _ "github.com/google/cayley/graph/leveldb" @@ -35,14 +42,19 @@ import ( _ "github.com/google/cayley/graph/mongo" ) -var tripleFile = flag.String("triples", "", "Triple file to load for database init.") -var cpuprofile = flag.String("prof", "", "Output profiling file.") -var queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") -var configFile = flag.String("config", "", "Path to an explicit configuration file.") +var ( + tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.") + tripleType = flag.String("format", "cquad", `Triple format to use for loading ("cquad" or "nquad").`) + cpuprofile = flag.String("prof", "", "Output profiling file.") + queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") + configFile = flag.String("config", "", "Path to an explicit configuration file.") +) // Filled in by `go build ldflags="-X main.VERSION `ver`"`. -var BUILD_DATE string -var VERSION string +var ( + BUILD_DATE string + VERSION string +) func Usage() { fmt.Println("Cayley is a graph store and graph query layer.") @@ -102,14 +114,28 @@ func main() { os.Exit(0) case "init": - err = db.Init(cfg, *tripleFile) + err = db.Init(cfg) + if err != nil { + break + } + if *tripleFile != "" { + ts, err = db.Open(cfg) + if err != nil { + break + } + err = load(ts, cfg, *tripleFile, *tripleType) + if err != nil { + break + } + ts.Close() + } case "load": ts, err = db.Open(cfg) if err != nil { break } - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", *tripleType) if err != nil { break } @@ -122,7 +148,7 @@ func main() { break } if !graph.IsPersistent(cfg.DatabaseType) { - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", *tripleType) if err != nil { break } @@ -138,7 +164,7 @@ func main() { break } if !graph.IsPersistent(cfg.DatabaseType) { - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", *tripleType) if err != nil { break } @@ -156,3 +182,58 @@ func main() { glog.Errorln(err) } } + +// TODO(kortschak) Make path a URI to allow pointing to any resource. +func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error { + if path == "" { + path = cfg.DatabasePath + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("could not open file %q: %v", path, err) + } + defer f.Close() + + r, err := decompressor(f) + if err != nil { + return err + } + + var dec quad.Unmarshaler + switch typ { + case "cquad": + dec = cquads.NewDecoder(r) + case "nquad": + dec = nquads.NewDecoder(r) + default: + return fmt.Errorf("unknown quad format %q", typ) + } + + return db.Load(ts, cfg, dec) +} + +const ( + gzipMagic = "\x1f\x8b" + b2zipMagic = "BZh" +) + +type readAtReader interface { + io.Reader + io.ReaderAt +} + +func decompressor(r readAtReader) (io.Reader, error) { + var buf [3]byte + _, err := r.ReadAt(buf[:], 0) + if err != nil { + return nil, err + } + switch { + case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0: + return gzip.NewReader(r) + case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0: + return bzip2.NewReader(r), nil + default: + return r, nil + } +} diff --git a/cayley_test.go b/cayley_test.go index e59ec53..eba0720 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -15,6 +15,9 @@ package main import ( + "bytes" + "compress/bzip2" + "compress/gzip" "sync" "testing" "time" @@ -310,7 +313,7 @@ func prepare(t testing.TB) { } if !graph.IsPersistent(cfg.DatabaseType) { - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", "cquad") if err != nil { t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) } @@ -415,3 +418,76 @@ func BenchmarkKeanuOther(b *testing.B) { func BenchmarkKeanuBullockOther(b *testing.B) { runBench(8, b) } + +var testDecompressor = []struct { + message string + input []byte + expect []byte + err error + readErr error +}{ + { + message: "text input", + input: []byte("cayley data\n"), + err: nil, + expect: []byte("cayley data\n"), + readErr: nil, + }, + { + message: "gzip input", + input: []byte{ + 0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad, + 0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00, + }, + err: nil, + expect: []byte("cayley data\n"), + readErr: nil, + }, + { + message: "bzip2 input", + input: []byte{ + 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00, + 0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c, + 0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16, + 0xa9, 0x7c, 0x78, 0x80, + }, + err: nil, + expect: []byte("cayley data\n"), + readErr: nil, + }, + { + message: "bad gzip input", + input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + err: gzip.ErrHeader, + expect: nil, + readErr: nil, + }, + { + message: "bad bzip2 input", + input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + err: nil, + expect: nil, + readErr: bzip2.StructuralError("invalid compression level"), + }, +} + +func TestDecompressor(t *testing.T) { + for _, test := range testDecompressor { + buf := bytes.NewReader(test.input) + r, err := decompressor(buf) + if err != test.err { + t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err) + } + if err != nil { + continue + } + p := make([]byte, len(test.expect)*2) + n, err := r.Read(p) + if err != test.readErr { + t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err) + } + if bytes.Compare(p[:n], test.expect) != 0 { + t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect) + } + } +} diff --git a/db/init.go b/db/init.go index 650a854..0f0887c 100644 --- a/db/init.go +++ b/db/init.go @@ -24,25 +24,10 @@ import ( var ErrNotPersistent = errors.New("database type is not persistent") -func Init(cfg *config.Config, triplePath string) error { +func Init(cfg *config.Config) error { if !graph.IsPersistent(cfg.DatabaseType) { return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) } - err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) - if err != nil { - return err - } - if triplePath != "" { - ts, err := Open(cfg) - if err != nil { - return err - } - err = Load(ts, cfg, triplePath) - if err != nil { - return err - } - ts.Close() - } - return err + return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) } diff --git a/db/load.go b/db/load.go index 9a3b069..2f0ab23 100644 --- a/db/load.go +++ b/db/load.go @@ -15,46 +15,24 @@ package db import ( - "bytes" - "compress/bzip2" - "compress/gzip" - "fmt" "io" - "os" - "github.com/barakmich/glog" "github.com/google/cayley/config" "github.com/google/cayley/graph" "github.com/google/cayley/quad" - "github.com/google/cayley/quad/cquads" ) -func Load(ts graph.TripleStore, cfg *config.Config, path string) error { - f, err := os.Open(path) - if err != nil { - return fmt.Errorf("could not open file %q: %v", path, err) - } - defer f.Close() - - r, err := decompressor(f) - if err != nil { - glog.Fatalln(err) - } - - dec := cquads.NewDecoder(r) - +func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { bulker, canBulk := ts.(graph.BulkLoader) if canBulk { - err = bulker.BulkLoad(dec) - if err == nil { + switch err := bulker.BulkLoad(dec); err { + case nil: return nil + case graph.ErrCannotBulkLoad: + // Try individual loading. + default: + return err } - if err == graph.ErrCannotBulkLoad { - err = nil - } - } - if err != nil { - return err } block := make([]quad.Quad, 0, cfg.LoadSize) @@ -76,29 +54,3 @@ func Load(ts graph.TripleStore, cfg *config.Config, path string) error { return nil } - -const ( - gzipMagic = "\x1f\x8b" - b2zipMagic = "BZh" -) - -type readAtReader interface { - io.Reader - io.ReaderAt -} - -func decompressor(r readAtReader) (io.Reader, error) { - var buf [3]byte - _, err := r.ReadAt(buf[:], 0) - if err != nil { - return nil, err - } - switch { - case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0: - return gzip.NewReader(r) - case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0: - return bzip2.NewReader(r), nil - default: - return r, nil - } -} diff --git a/db/load_test.go b/db/load_test.go deleted file mode 100644 index 17ed6c5..0000000 --- a/db/load_test.go +++ /dev/null @@ -1,81 +0,0 @@ -package db - -import ( - "bytes" - "compress/bzip2" - "compress/gzip" - "testing" -) - -var testDecompressor = []struct { - message string - input []byte - expect []byte - err error - readErr error -}{ - { - message: "text input", - input: []byte("cayley data\n"), - err: nil, - expect: []byte("cayley data\n"), - readErr: nil, - }, - { - message: "gzip input", - input: []byte{ - 0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad, - 0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00, - }, - err: nil, - expect: []byte("cayley data\n"), - readErr: nil, - }, - { - message: "bzip2 input", - input: []byte{ - 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00, - 0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c, - 0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16, - 0xa9, 0x7c, 0x78, 0x80, - }, - err: nil, - expect: []byte("cayley data\n"), - readErr: nil, - }, - { - message: "bad gzip input", - input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, - err: gzip.ErrHeader, - expect: nil, - readErr: nil, - }, - { - message: "bad bzip2 input", - input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, - err: nil, - expect: nil, - readErr: bzip2.StructuralError("invalid compression level"), - }, -} - -func TestDecompressor(t *testing.T) { - for _, test := range testDecompressor { - buf := bytes.NewReader(test.input) - r, err := decompressor(buf) - if err != test.err { - t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err) - } - if err != nil { - continue - } - p := make([]byte, len(test.expect)*2) - n, err := r.Read(p) - if err != test.readErr { - t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err) - } - if bytes.Compare(p[:n], test.expect) != 0 { - t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect) - } - } -} From 088e73a163f2024ff76a91d7ab2f92c57eda4533 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 10:32:58 +0930 Subject: [PATCH 4/7] Merge {init,open,load}.go into db.go The functions are too small to justify a single file each. The repl file is left out as semantically unrelated. --- db/db.go | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ db/init.go | 33 -------------------------- db/load.go | 56 ------------------------------------------- db/open.go | 32 ------------------------- 4 files changed, 80 insertions(+), 121 deletions(-) create mode 100644 db/db.go delete mode 100644 db/init.go delete mode 100644 db/load.go delete mode 100644 db/open.go diff --git a/db/db.go b/db/db.go new file mode 100644 index 0000000..8ea30db --- /dev/null +++ b/db/db.go @@ -0,0 +1,80 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package db + +import ( + "errors" + "fmt" + "io" + + "github.com/barakmich/glog" + + "github.com/google/cayley/config" + "github.com/google/cayley/graph" + "github.com/google/cayley/quad" +) + +var ErrNotPersistent = errors.New("database type is not persistent") + +func Init(cfg *config.Config) error { + if !graph.IsPersistent(cfg.DatabaseType) { + return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) + } + + return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) +} + +func Open(cfg *config.Config) (graph.TripleStore, error) { + glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath) + ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) + if err != nil { + return nil, err + } + + return ts, nil +} + +func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { + bulker, canBulk := ts.(graph.BulkLoader) + if canBulk { + switch err := bulker.BulkLoad(dec); err { + case nil: + return nil + case graph.ErrCannotBulkLoad: + // Try individual loading. + default: + return err + } + } + + block := make([]quad.Quad, 0, cfg.LoadSize) + for { + t, err := dec.Unmarshal() + if err != nil { + if err == io.EOF { + break + } + return err + } + block = append(block, t) + if len(block) == cap(block) { + ts.AddTripleSet(block) + block = block[:0] + } + } + ts.AddTripleSet(block) + + return nil +} diff --git a/db/init.go b/db/init.go deleted file mode 100644 index 0f0887c..0000000 --- a/db/init.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package db - -import ( - "errors" - "fmt" - - "github.com/google/cayley/config" - "github.com/google/cayley/graph" -) - -var ErrNotPersistent = errors.New("database type is not persistent") - -func Init(cfg *config.Config) error { - if !graph.IsPersistent(cfg.DatabaseType) { - return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) - } - - return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) -} diff --git a/db/load.go b/db/load.go deleted file mode 100644 index 2f0ab23..0000000 --- a/db/load.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package db - -import ( - "io" - - "github.com/google/cayley/config" - "github.com/google/cayley/graph" - "github.com/google/cayley/quad" -) - -func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { - bulker, canBulk := ts.(graph.BulkLoader) - if canBulk { - switch err := bulker.BulkLoad(dec); err { - case nil: - return nil - case graph.ErrCannotBulkLoad: - // Try individual loading. - default: - return err - } - } - - block := make([]quad.Quad, 0, cfg.LoadSize) - for { - t, err := dec.Unmarshal() - if err != nil { - if err == io.EOF { - break - } - return err - } - block = append(block, t) - if len(block) == cap(block) { - ts.AddTripleSet(block) - block = block[:0] - } - } - ts.AddTripleSet(block) - - return nil -} diff --git a/db/open.go b/db/open.go deleted file mode 100644 index bf1f2c7..0000000 --- a/db/open.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package db - -import ( - "github.com/barakmich/glog" - - "github.com/google/cayley/config" - "github.com/google/cayley/graph" -) - -func Open(cfg *config.Config) (graph.TripleStore, error) { - glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath) - ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) - if err != nil { - return nil, err - } - - return ts, nil -} From 844927ff1f4a692412031309fa58c04fd9f65208 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 21:27:17 +0930 Subject: [PATCH 5/7] Make decompressor conditional on reader interface --- cayley.go | 13 ++++++------- cayley_test.go | 37 +++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/cayley.go b/cayley.go index fa0558e..0c2efda 100644 --- a/cayley.go +++ b/cayley.go @@ -217,14 +217,13 @@ const ( b2zipMagic = "BZh" ) -type readAtReader interface { - io.Reader - io.ReaderAt -} - -func decompressor(r readAtReader) (io.Reader, error) { +func decompressor(r io.Reader) (io.Reader, error) { + ra, ok := r.(io.ReaderAt) + if !ok { + return r, nil + } var buf [3]byte - _, err := r.ReadAt(buf[:], 0) + _, err := ra.ReadAt(buf[:], 0) if err != nil { return nil, err } diff --git a/cayley_test.go b/cayley_test.go index eba0720..ce382b2 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -18,6 +18,8 @@ import ( "bytes" "compress/bzip2" "compress/gzip" + "io" + "strings" "sync" "testing" "time" @@ -419,62 +421,77 @@ func BenchmarkKeanuBullockOther(b *testing.B) { runBench(8, b) } +// reader is a test helper to filter non-io.Reader methods from the contained io.Reader. +type reader struct { + r io.Reader +} + +func (r reader) Read(p []byte) (int, error) { + return r.r.Read(p) +} + var testDecompressor = []struct { message string - input []byte + input io.Reader expect []byte err error readErr error }{ { message: "text input", - input: []byte("cayley data\n"), + input: strings.NewReader("cayley data\n"), err: nil, expect: []byte("cayley data\n"), readErr: nil, }, { message: "gzip input", - input: []byte{ + input: bytes.NewReader([]byte{ 0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad, 0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00, - }, + }), err: nil, expect: []byte("cayley data\n"), readErr: nil, }, { message: "bzip2 input", - input: []byte{ + input: bytes.NewReader([]byte{ 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00, 0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c, 0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16, 0xa9, 0x7c, 0x78, 0x80, - }, + }), err: nil, expect: []byte("cayley data\n"), readErr: nil, }, { message: "bad gzip input", - input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + input: strings.NewReader("\x1f\x8bcayley data\n"), err: gzip.ErrHeader, expect: nil, readErr: nil, }, { message: "bad bzip2 input", - input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + input: strings.NewReader("\x42\x5a\x68cayley data\n"), err: nil, expect: nil, readErr: bzip2.StructuralError("invalid compression level"), }, + { + message: "gzip input without ReadAt", + input: reader{strings.NewReader("\x1f\x8bcayley data\n")}, + err: nil, + expect: []byte("\x1f\x8bcayley data\n"), + readErr: nil, + }, } func TestDecompressor(t *testing.T) { for _, test := range testDecompressor { - buf := bytes.NewReader(test.input) - r, err := decompressor(buf) + r, err := decompressor(test.input) if err != test.err { t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err) } From 4844ef3e58796be9ddb311ee98a0ae09b8464285 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 22:19:06 +0930 Subject: [PATCH 6/7] Add support for remote data sources Now you can, for example: cayley http --dbpath="https://github.com/google/cayley/blob/master/30kmoviedata.nq.gz?raw=true" --- cayley.go | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/cayley.go b/cayley.go index 0c2efda..3d7a157 100644 --- a/cayley.go +++ b/cayley.go @@ -17,13 +17,17 @@ package main import ( + "bufio" "bytes" "compress/bzip2" "compress/gzip" "flag" "fmt" "io" + client "net/http" + "net/url" "os" + "path/filepath" "runtime" "github.com/barakmich/glog" @@ -183,18 +187,35 @@ func main() { } } -// TODO(kortschak) Make path a URI to allow pointing to any resource. func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error { + var r io.Reader + if path == "" { path = cfg.DatabasePath } - f, err := os.Open(path) - if err != nil { - return fmt.Errorf("could not open file %q: %v", path, err) + u, err := url.Parse(path) + if err != nil || u.Scheme == "file" || u.Scheme == "" { + // Don't alter relative URL path or non-URL path parameter. + if u.Scheme != "" && err == nil { + // Recovery heuristic for mistyping "file://path/to/file". + path = filepath.Join(u.Host, u.Path) + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("could not open file %q: %v", path, err) + } + defer f.Close() + r = f + } else { + res, err := client.Get(path) + if err != nil { + return fmt.Errorf("could not get resource <%s>: %v", u, err) + } + defer res.Body.Close() + r = res.Body } - defer f.Close() - r, err := decompressor(f) + r, err = decompressor(r) if err != nil { return err } @@ -218,21 +239,17 @@ const ( ) func decompressor(r io.Reader) (io.Reader, error) { - ra, ok := r.(io.ReaderAt) - if !ok { - return r, nil - } - var buf [3]byte - _, err := ra.ReadAt(buf[:], 0) + br := bufio.NewReader(r) + buf, err := br.Peek(3) if err != nil { return nil, err } switch { case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0: - return gzip.NewReader(r) + return gzip.NewReader(br) case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0: - return bzip2.NewReader(r), nil + return bzip2.NewReader(br), nil default: - return r, nil + return br, nil } } From 86bf7e9e6aad200a0850f5a92e35352b52700b97 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 22:21:42 +0930 Subject: [PATCH 7/7] Remove now-irrelevant failing test --- cayley_test.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cayley_test.go b/cayley_test.go index ce382b2..d108f85 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -480,13 +480,6 @@ var testDecompressor = []struct { expect: nil, readErr: bzip2.StructuralError("invalid compression level"), }, - { - message: "gzip input without ReadAt", - input: reader{strings.NewReader("\x1f\x8bcayley data\n")}, - err: nil, - expect: []byte("\x1f\x8bcayley data\n"), - readErr: nil, - }, } func TestDecompressor(t *testing.T) {