From 0c3757d48e5c7a09ddc297d9bd6952d1a2b54a79 Mon Sep 17 00:00:00 2001 From: David Schor Date: Mon, 10 Aug 2015 01:40:33 -0400 Subject: [PATCH 01/11] initial addition for exporting functionality --- cmd/cayley/cayley.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/cmd/cayley/cayley.go b/cmd/cayley/cayley.go index 77c6cde..6106de4 100644 --- a/cmd/cayley/cayley.go +++ b/cmd/cayley/cayley.go @@ -49,6 +49,8 @@ var ( configFile = flag.String("config", "", "Path to an explicit configuration file.") databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") databaseBackend = flag.String("db", "memstore", "Database Backend.") + dumpFile = flag.String("dump", "dbdump.nq", `Quad file to dump the database to (".gz" supported, "-" for stdout).`) + dumpType = flag.String("dump_type", "json", `Quad file format ("json", "nquad").`) replicationBackend = flag.String("replication", "single", "Replication method.") host = flag.String("host", "127.0.0.1", "Host to listen on (defaults to all).") loadSize = flag.Int("load_size", 10000, "Size of quadsets to load") @@ -72,6 +74,7 @@ Commands: init Create an empty database. load Bulk-load a quad file into the database. http Serve an HTTP endpoint on the given host and port. + dump Bulk-dump the database into a quad file. repl Drop into a REPL of the given query language. version Version information. @@ -204,6 +207,21 @@ func main() { handle.Close() + case "dump": + handle, err = db.Open(cfg) + if err != nil { + break + } + if !graph.IsPersistent(cfg.DatabaseType) { + err = internal.Load(handle.QuadWriter, cfg, *quadFile, *quadType) + if err != nil { + break + } + } + + // internal.Dump() + handle.Close() + case "repl": handle, err = db.Open(cfg) if err != nil { From 85a1bbdf0e4c601b7c1910be704ad6a1210a2378 Mon Sep 17 00:00:00 2001 From: David Schor Date: Mon, 10 Aug 2015 01:57:25 -0400 Subject: [PATCH 02/11] exporter initial skeleton --- exporter/exporter.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 exporter/exporter.go diff --git a/exporter/exporter.go b/exporter/exporter.go new file mode 100644 index 0000000..cd93c0c --- /dev/null +++ b/exporter/exporter.go @@ -0,0 +1,49 @@ +package exporter + +import ( + "io" + "encoding/json" + + "github.com/google/cayley/graph" +) + +type Exporter struct { + wr io.Writer + qstore graph.QuadStore + err error + count int32 +} + +func NewExporter(writer io.Writer, qstore graph.QuadStore) *Exporter { + return &Exporter{wr: writer, qstore: qstore} +} + +// number of records +func (exp *Exporter) Count() int32 { + return exp.count +} + +//print out the string quoted, escaped +func (exp *Exporter) WriteEscString(str string) { + var esc []byte + + if exp.err != nil { + return + } + esc, exp.err = json.Marshal(str) + if exp.err != nil { + return + } + _, exp.err = exp.wr.Write(esc) +} + +func (exp *Exporter) Write(str string) { + if exp.err != nil { + return + } + _, exp.err = exp.wr.Write([]byte(str)) +} + +func (exp *Exporter) Err() error { + return exp.err +} From 00b13d2ac171d17f399788805830c27e964d0b8f Mon Sep 17 00:00:00 2001 From: David Schor Date: Mon, 10 Aug 2015 01:58:25 -0400 Subject: [PATCH 03/11] internal dump routine --- cmd/cayley/cayley.go | 6 +++++- internal/dump.go | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 internal/dump.go diff --git a/cmd/cayley/cayley.go b/cmd/cayley/cayley.go index 6106de4..4411d53 100644 --- a/cmd/cayley/cayley.go +++ b/cmd/cayley/cayley.go @@ -219,7 +219,11 @@ func main() { } } - // internal.Dump() + err = internal.Dump(handle.QuadStore, *dumpFile, *dumpType) + if err != nil { + break + } + handle.Close() case "repl": diff --git a/internal/dump.go b/internal/dump.go new file mode 100644 index 0000000..ae5f4ef --- /dev/null +++ b/internal/dump.go @@ -0,0 +1,36 @@ +package internal + +import ( + "fmt" + "os" + + "github.com/google/cayley/graph" + "github.com/google/cayley/exporter" +) + +// Dump the content of the database into a file based +// on a few different formats +func Dump(qs graph.QuadStore, outFile, typ string) error { + var f *os.File + if outFile == "-" { + f = os.Stdout + } else { + var err error + f, err = os.Create(outFile) + if err != nil { + return fmt.Errorf("could not open file %q: %v", outFile, err) + } + defer f.Close() + fmt.Printf("dumping db to file %q\n", outFile) + } + + export := exporter.NewExporter(f, qs) + if export.Err() != nil { + return export.Err() + } + + if outFile != "-" { + fmt.Printf("%d entries were written\n", export.Count()) + } + return nil +} From 1e9c6990bbabcb23044441d346686e46aefdefa6 Mon Sep 17 00:00:00 2001 From: David Schor Date: Mon, 10 Aug 2015 02:14:10 -0400 Subject: [PATCH 04/11] json export --- exporter/exporter.go | 19 +++++++++++++++++++ internal/dump.go | 1 + 2 files changed, 20 insertions(+) diff --git a/exporter/exporter.go b/exporter/exporter.go index cd93c0c..8443df3 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -23,6 +23,25 @@ func (exp *Exporter) Count() int32 { return exp.count } +func (exp *Exporter) ExportJson() { + var jstr []byte + exp.Write("[") + it := exp.qstore.QuadsAllIterator() + for graph.Next(it) { + exp.count++ + if exp.count > 1 { + exp.Write(",") + } + + jstr, exp.err = json.Marshal(exp.qstore.Quad(it.Result())) + if exp.err != nil { + return + } + exp.Write(string(jstr[:])) + } + exp.Write("]\n") +} + //print out the string quoted, escaped func (exp *Exporter) WriteEscString(str string) { var esc []byte diff --git a/internal/dump.go b/internal/dump.go index ae5f4ef..bf443f9 100644 --- a/internal/dump.go +++ b/internal/dump.go @@ -28,6 +28,7 @@ func Dump(qs graph.QuadStore, outFile, typ string) error { if export.Err() != nil { return export.Err() } + export.ExportJson() if outFile != "-" { fmt.Printf("%d entries were written\n", export.Count()) From 76e4a5d15e1c077e4b69d4ac82b6b32206cde62c Mon Sep 17 00:00:00 2001 From: David Schor Date: Mon, 10 Aug 2015 02:28:39 -0400 Subject: [PATCH 05/11] export nquads --- exporter/exporter.go | 19 +++++++++++++++++++ internal/dump.go | 14 +++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/exporter/exporter.go b/exporter/exporter.go index 8443df3..07e56a9 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -23,6 +23,25 @@ func (exp *Exporter) Count() int32 { return exp.count } +func (exp *Exporter) ExportNquad() { + it := exp.qstore.QuadsAllIterator() + for graph.Next(it) { + exp.count++ + quad := exp.qstore.Quad(it.Result()) + + exp.WriteEscString(quad.Subject) + exp.Write(" ") + exp.WriteEscString(quad.Predicate) + exp.Write(" ") + exp.WriteEscString(quad.Object) + if quad.Label != "" { + exp.Write(" ") + exp.WriteEscString(quad.Label) + } + exp.Write(" .\n") + } +} + func (exp *Exporter) ExportJson() { var jstr []byte exp.Write("[") diff --git a/internal/dump.go b/internal/dump.go index bf443f9..e43fe3f 100644 --- a/internal/dump.go +++ b/internal/dump.go @@ -28,7 +28,19 @@ func Dump(qs graph.QuadStore, outFile, typ string) error { if export.Err() != nil { return export.Err() } - export.ExportJson() + + switch typ { + case "nquad": + export.ExportNquad() + case "json": + export.ExportJson() + default: + return fmt.Errorf("unknown format %q", typ) + } + + if export.Err() != nil { + return export.Err() + } if outFile != "-" { fmt.Printf("%d entries were written\n", export.Count()) From 99587c2f2499cb0ffa645d62e8d5c6c6db1ff3bb Mon Sep 17 00:00:00 2001 From: David Schor Date: Mon, 10 Aug 2015 02:38:15 -0400 Subject: [PATCH 06/11] added experimental types graphml/gml --- exporter/exporter.go | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++ internal/dump.go | 5 +++ 2 files changed, 98 insertions(+) diff --git a/exporter/exporter.go b/exporter/exporter.go index 07e56a9..10fef10 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -3,6 +3,7 @@ package exporter import ( "io" "encoding/json" + "strconv" "github.com/google/cayley/graph" ) @@ -61,6 +62,98 @@ func (exp *Exporter) ExportJson() { exp.Write("]\n") } +//experimental +func (exp *Exporter) ExportGml() { + var seen map[string]int32 // todo eliminate this for large dbs + var id int32 + + exp.Write("Creator Cayley\ngraph\n[\n") + + seen = make(map[string]int32) + it := exp.qstore.QuadsAllIterator() + for graph.Next(it) { + cur := exp.qstore.Quad(it.Result()) + if _, ok := seen[cur.Subject]; !ok { + exp.Write(" node\n [\n id ") + seen[cur.Subject] = id + exp.Write(strconv.FormatInt(int64(id), 10)) + exp.Write("\n label ") + exp.WriteEscString(cur.Subject) + exp.Write("\n ]\n") + id++ + } + if _, ok := seen[cur.Object]; !ok { + exp.Write(" node\n [\n id ") + seen[cur.Object] = id + exp.Write(strconv.FormatInt(int64(id), 10)) + exp.Write("\n label ") + exp.WriteEscString(cur.Object) + exp.Write("\n ]\n") + id++ + } + exp.count++ + } + + it.Reset() + for graph.Next(it) { + cur := exp.qstore.Quad(it.Result()) + exp.Write(" edge\n [\n source ") + exp.Write(strconv.FormatInt(int64(seen[cur.Subject]), 10)) + exp.Write("\n target ") + exp.Write(strconv.FormatInt(int64(seen[cur.Object]), 10)) + exp.Write("\n label ") + exp.WriteEscString(cur.Predicate) + exp.Write("\n ]\n") + exp.count++ + } + exp.Write("]\n") +} + +//experimental +func (exp *Exporter) ExportGraphml() { + var seen map[string]bool // eliminate this for large databases + + exp.Write("\n") + exp.Write("\n") + exp.Write(" \n") + + seen = make(map[string]bool) + it := exp.qstore.QuadsAllIterator() + for graph.Next(it) { + cur := exp.qstore.Quad(it.Result()) + if found := seen[cur.Subject]; !found { + seen[cur.Subject] = true + exp.Write(" \n") + } + if found := seen[cur.Object]; !found { + seen[cur.Object] = true + exp.Write(" \n") + } + exp.count++ + } + + it.Reset() + for graph.Next(it) { + cur := exp.qstore.Quad(it.Result()) + exp.Write(" \n") + exp.Write(" ") + exp.Write(cur.Predicate) + exp.Write("\n \n") + exp.count++ + } + exp.Write(" \n\n"); +} + //print out the string quoted, escaped func (exp *Exporter) WriteEscString(str string) { var esc []byte diff --git a/internal/dump.go b/internal/dump.go index e43fe3f..e4674be 100644 --- a/internal/dump.go +++ b/internal/dump.go @@ -34,6 +34,11 @@ func Dump(qs graph.QuadStore, outFile, typ string) error { export.ExportNquad() case "json": export.ExportJson() + // gml/graphml experimental + case "gml": + export.ExportGml() + case "graphml": + export.ExportGraphml() default: return fmt.Errorf("unknown format %q", typ) } From 19f5b090e68f2fef69d8dd55be6dcd4ec88f45bc Mon Sep 17 00:00:00 2001 From: David Schor Date: Mon, 10 Aug 2015 02:44:19 -0400 Subject: [PATCH 07/11] add gzip support --- internal/dump.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/internal/dump.go b/internal/dump.go index e4674be..2bdc799 100644 --- a/internal/dump.go +++ b/internal/dump.go @@ -3,6 +3,8 @@ package internal import ( "fmt" "os" + "compress/gzip" + "path/filepath" "github.com/google/cayley/graph" "github.com/google/cayley/exporter" @@ -24,11 +26,16 @@ func Dump(qs graph.QuadStore, outFile, typ string) error { fmt.Printf("dumping db to file %q\n", outFile) } - export := exporter.NewExporter(f, qs) - if export.Err() != nil { - return export.Err() + var export *exporter.Exporter + if filepath.Ext(outFile) == ".gz" { + gzip := gzip.NewWriter(f) + defer gzip.Close() + export = exporter.NewExporter(gzip, qs) + } else { + export = exporter.NewExporter(f, qs) } + //TODO: add possible support for exporting specific queries only switch typ { case "nquad": export.ExportNquad() From f1566ba182a7c0f731b57a8451b80a452da6eed3 Mon Sep 17 00:00:00 2001 From: David Schor Date: Tue, 11 Aug 2015 02:32:43 -0400 Subject: [PATCH 08/11] renamed nquad quad, added gml/graphml, changed default to quad --- cmd/cayley/cayley.go | 2 +- internal/dump.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/cayley/cayley.go b/cmd/cayley/cayley.go index 4411d53..f0512df 100644 --- a/cmd/cayley/cayley.go +++ b/cmd/cayley/cayley.go @@ -50,7 +50,7 @@ var ( databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") databaseBackend = flag.String("db", "memstore", "Database Backend.") dumpFile = flag.String("dump", "dbdump.nq", `Quad file to dump the database to (".gz" supported, "-" for stdout).`) - dumpType = flag.String("dump_type", "json", `Quad file format ("json", "nquad").`) + dumpType = flag.String("dump_type", "quad", `Quad file format ("json", "quad", "gml", "graphml").`) replicationBackend = flag.String("replication", "single", "Replication method.") host = flag.String("host", "127.0.0.1", "Host to listen on (defaults to all).") loadSize = flag.Int("load_size", 10000, "Size of quadsets to load") diff --git a/internal/dump.go b/internal/dump.go index 2bdc799..0b5d5fe 100644 --- a/internal/dump.go +++ b/internal/dump.go @@ -37,7 +37,7 @@ func Dump(qs graph.QuadStore, outFile, typ string) error { //TODO: add possible support for exporting specific queries only switch typ { - case "nquad": + case "quad": export.ExportNquad() case "json": export.ExportJson() From b5f0d3688b75ecfff257ab7bd62326148c391c82 Mon Sep 17 00:00:00 2001 From: David Schor Date: Tue, 11 Aug 2015 04:17:03 -0400 Subject: [PATCH 09/11] switched out QuadsAllIterator to allow any iterator --- exporter/exporter.go | 33 +++++++++++++++++++-------------- internal/dump.go | 2 +- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/exporter/exporter.go b/exporter/exporter.go index 10fef10..d5e24b9 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -11,12 +11,17 @@ import ( type Exporter struct { wr io.Writer qstore graph.QuadStore + qi graph.Iterator err error count int32 } func NewExporter(writer io.Writer, qstore graph.QuadStore) *Exporter { - return &Exporter{wr: writer, qstore: qstore} + return NewExporterForIterator(writer, qstore, qstore.QuadsAllIterator()) +} + +func NewExporterForIterator(writer io.Writer, qstore graph.QuadStore, qi graph.Iterator) *Exporter { + return &Exporter{wr: writer, qstore: qstore, qi: qi} } // number of records @@ -24,9 +29,9 @@ func (exp *Exporter) Count() int32 { return exp.count } -func (exp *Exporter) ExportNquad() { - it := exp.qstore.QuadsAllIterator() - for graph.Next(it) { +func (exp *Exporter) ExportQuad() { + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { exp.count++ quad := exp.qstore.Quad(it.Result()) @@ -46,8 +51,8 @@ func (exp *Exporter) ExportNquad() { func (exp *Exporter) ExportJson() { var jstr []byte exp.Write("[") - it := exp.qstore.QuadsAllIterator() - for graph.Next(it) { + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { exp.count++ if exp.count > 1 { exp.Write(",") @@ -70,8 +75,8 @@ func (exp *Exporter) ExportGml() { exp.Write("Creator Cayley\ngraph\n[\n") seen = make(map[string]int32) - it := exp.qstore.QuadsAllIterator() - for graph.Next(it) { + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { cur := exp.qstore.Quad(it.Result()) if _, ok := seen[cur.Subject]; !ok { exp.Write(" node\n [\n id ") @@ -94,8 +99,8 @@ func (exp *Exporter) ExportGml() { exp.count++ } - it.Reset() - for graph.Next(it) { + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { cur := exp.qstore.Quad(it.Result()) exp.Write(" edge\n [\n source ") exp.Write(strconv.FormatInt(int64(seen[cur.Subject]), 10)) @@ -120,8 +125,8 @@ func (exp *Exporter) ExportGraphml() { exp.Write(" \n") seen = make(map[string]bool) - it := exp.qstore.QuadsAllIterator() - for graph.Next(it) { + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { cur := exp.qstore.Quad(it.Result()) if found := seen[cur.Subject]; !found { seen[cur.Subject] = true @@ -138,8 +143,8 @@ func (exp *Exporter) ExportGraphml() { exp.count++ } - it.Reset() - for graph.Next(it) { + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { cur := exp.qstore.Quad(it.Result()) exp.Write(" Date: Tue, 11 Aug 2015 04:23:18 -0400 Subject: [PATCH 10/11] changed count to just int --- exporter/exporter.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/exporter.go b/exporter/exporter.go index d5e24b9..283e4ab 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -13,7 +13,7 @@ type Exporter struct { qstore graph.QuadStore qi graph.Iterator err error - count int32 + count int } func NewExporter(writer io.Writer, qstore graph.QuadStore) *Exporter { @@ -25,7 +25,7 @@ func NewExporterForIterator(writer io.Writer, qstore graph.QuadStore, qi graph.I } // number of records -func (exp *Exporter) Count() int32 { +func (exp *Exporter) Count() int { return exp.count } From f9ee0e77fcba86a2f9ed7d9203e4d18d5330dedc Mon Sep 17 00:00:00 2001 From: David Schor Date: Sat, 15 Aug 2015 06:58:42 -0400 Subject: [PATCH 11/11] formatted using gofmt --- exporter/exporter.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/exporter/exporter.go b/exporter/exporter.go index 283e4ab..39fdb4f 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -1,19 +1,19 @@ package exporter import ( - "io" "encoding/json" + "io" "strconv" "github.com/google/cayley/graph" ) type Exporter struct { - wr io.Writer + wr io.Writer qstore graph.QuadStore - qi graph.Iterator - err error - count int + qi graph.Iterator + err error + count int } func NewExporter(writer io.Writer, qstore graph.QuadStore) *Exporter { @@ -34,7 +34,7 @@ func (exp *Exporter) ExportQuad() { for it := exp.qi; graph.Next(it); { exp.count++ quad := exp.qstore.Quad(it.Result()) - + exp.WriteEscString(quad.Subject) exp.Write(" ") exp.WriteEscString(quad.Predicate) @@ -45,11 +45,11 @@ func (exp *Exporter) ExportQuad() { exp.WriteEscString(quad.Label) } exp.Write(" .\n") - } + } } func (exp *Exporter) ExportJson() { - var jstr []byte + var jstr []byte exp.Write("[") exp.qi.Reset() for it := exp.qi; graph.Next(it); { @@ -149,28 +149,28 @@ func (exp *Exporter) ExportGraphml() { exp.Write(" \n") exp.Write(" ") exp.Write(cur.Predicate) exp.Write("\n \n") exp.count++ } - exp.Write(" \n\n"); + exp.Write(" \n\n") } //print out the string quoted, escaped func (exp *Exporter) WriteEscString(str string) { var esc []byte - + if exp.err != nil { return - } + } esc, exp.err = json.Marshal(str) if exp.err != nil { return - } - _, exp.err = exp.wr.Write(esc) + } + _, exp.err = exp.wr.Write(esc) } func (exp *Exporter) Write(str string) {