diff --git a/cmd/cayley/cayley.go b/cmd/cayley/cayley.go index 7f76559..edf0a66 100644 --- a/cmd/cayley/cayley.go +++ b/cmd/cayley/cayley.go @@ -51,6 +51,8 @@ var ( configFile = flag.String("config", "", "Path to an explicit configuration file.") databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") databaseBackend = flag.String("db", "memstore", "Database Backend.") + dumpFile = flag.String("dump", "dbdump.nq", `Quad file to dump the database to (".gz" supported, "-" for stdout).`) + dumpType = flag.String("dump_type", "quad", `Quad file format ("json", "quad", "gml", "graphml").`) replicationBackend = flag.String("replication", "single", "Replication method.") host = flag.String("host", "127.0.0.1", "Host to listen on (defaults to all).") loadSize = flag.Int("load_size", 10000, "Size of quadsets to load") @@ -74,6 +76,7 @@ Commands: init Create an empty database. load Bulk-load a quad file into the database. http Serve an HTTP endpoint on the given host and port. + dump Bulk-dump the database into a quad file. repl Drop into a REPL of the given query language. version Version information. @@ -215,6 +218,25 @@ func main() { handle.Close() + case "dump": + handle, err = db.Open(cfg) + if err != nil { + break + } + if !graph.IsPersistent(cfg.DatabaseType) { + err = internal.Load(handle.QuadWriter, cfg, *quadFile, *quadType) + if err != nil { + break + } + } + + err = internal.Dump(handle.QuadStore, *dumpFile, *dumpType) + if err != nil { + break + } + + handle.Close() + case "repl": handle, err = db.Open(cfg) if err != nil { diff --git a/exporter/exporter.go b/exporter/exporter.go new file mode 100644 index 0000000..39fdb4f --- /dev/null +++ b/exporter/exporter.go @@ -0,0 +1,185 @@ +package exporter + +import ( + "encoding/json" + "io" + "strconv" + + "github.com/google/cayley/graph" +) + +type Exporter struct { + wr io.Writer + qstore graph.QuadStore + qi graph.Iterator + err error + count int +} + +func NewExporter(writer io.Writer, qstore graph.QuadStore) *Exporter { + return NewExporterForIterator(writer, qstore, qstore.QuadsAllIterator()) +} + +func NewExporterForIterator(writer io.Writer, qstore graph.QuadStore, qi graph.Iterator) *Exporter { + return &Exporter{wr: writer, qstore: qstore, qi: qi} +} + +// number of records +func (exp *Exporter) Count() int { + return exp.count +} + +func (exp *Exporter) ExportQuad() { + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { + exp.count++ + quad := exp.qstore.Quad(it.Result()) + + exp.WriteEscString(quad.Subject) + exp.Write(" ") + exp.WriteEscString(quad.Predicate) + exp.Write(" ") + exp.WriteEscString(quad.Object) + if quad.Label != "" { + exp.Write(" ") + exp.WriteEscString(quad.Label) + } + exp.Write(" .\n") + } +} + +func (exp *Exporter) ExportJson() { + var jstr []byte + exp.Write("[") + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { + exp.count++ + if exp.count > 1 { + exp.Write(",") + } + + jstr, exp.err = json.Marshal(exp.qstore.Quad(it.Result())) + if exp.err != nil { + return + } + exp.Write(string(jstr[:])) + } + exp.Write("]\n") +} + +//experimental +func (exp *Exporter) ExportGml() { + var seen map[string]int32 // todo eliminate this for large dbs + var id int32 + + exp.Write("Creator Cayley\ngraph\n[\n") + + seen = make(map[string]int32) + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { + cur := exp.qstore.Quad(it.Result()) + if _, ok := seen[cur.Subject]; !ok { + exp.Write(" node\n [\n id ") + seen[cur.Subject] = id + exp.Write(strconv.FormatInt(int64(id), 10)) + exp.Write("\n label ") + exp.WriteEscString(cur.Subject) + exp.Write("\n ]\n") + id++ + } + if _, ok := seen[cur.Object]; !ok { + exp.Write(" node\n [\n id ") + seen[cur.Object] = id + exp.Write(strconv.FormatInt(int64(id), 10)) + exp.Write("\n label ") + exp.WriteEscString(cur.Object) + exp.Write("\n ]\n") + id++ + } + exp.count++ + } + + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { + cur := exp.qstore.Quad(it.Result()) + exp.Write(" edge\n [\n source ") + exp.Write(strconv.FormatInt(int64(seen[cur.Subject]), 10)) + exp.Write("\n target ") + exp.Write(strconv.FormatInt(int64(seen[cur.Object]), 10)) + exp.Write("\n label ") + exp.WriteEscString(cur.Predicate) + exp.Write("\n ]\n") + exp.count++ + } + exp.Write("]\n") +} + +//experimental +func (exp *Exporter) ExportGraphml() { + var seen map[string]bool // eliminate this for large databases + + exp.Write("\n") + exp.Write("\n") + exp.Write(" \n") + + seen = make(map[string]bool) + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { + cur := exp.qstore.Quad(it.Result()) + if found := seen[cur.Subject]; !found { + seen[cur.Subject] = true + exp.Write(" \n") + } + if found := seen[cur.Object]; !found { + seen[cur.Object] = true + exp.Write(" \n") + } + exp.count++ + } + + exp.qi.Reset() + for it := exp.qi; graph.Next(it); { + cur := exp.qstore.Quad(it.Result()) + exp.Write(" \n") + exp.Write(" ") + exp.Write(cur.Predicate) + exp.Write("\n \n") + exp.count++ + } + exp.Write(" \n\n") +} + +//print out the string quoted, escaped +func (exp *Exporter) WriteEscString(str string) { + var esc []byte + + if exp.err != nil { + return + } + esc, exp.err = json.Marshal(str) + if exp.err != nil { + return + } + _, exp.err = exp.wr.Write(esc) +} + +func (exp *Exporter) Write(str string) { + if exp.err != nil { + return + } + _, exp.err = exp.wr.Write([]byte(str)) +} + +func (exp *Exporter) Err() error { + return exp.err +} diff --git a/internal/dump.go b/internal/dump.go new file mode 100644 index 0000000..e8a0f9a --- /dev/null +++ b/internal/dump.go @@ -0,0 +1,61 @@ +package internal + +import ( + "fmt" + "os" + "compress/gzip" + "path/filepath" + + "github.com/google/cayley/graph" + "github.com/google/cayley/exporter" +) + +// Dump the content of the database into a file based +// on a few different formats +func Dump(qs graph.QuadStore, outFile, typ string) error { + var f *os.File + if outFile == "-" { + f = os.Stdout + } else { + var err error + f, err = os.Create(outFile) + if err != nil { + return fmt.Errorf("could not open file %q: %v", outFile, err) + } + defer f.Close() + fmt.Printf("dumping db to file %q\n", outFile) + } + + var export *exporter.Exporter + if filepath.Ext(outFile) == ".gz" { + gzip := gzip.NewWriter(f) + defer gzip.Close() + export = exporter.NewExporter(gzip, qs) + } else { + export = exporter.NewExporter(f, qs) + } + + //TODO: add possible support for exporting specific queries only + switch typ { + case "quad": + export.ExportQuad() + case "json": + export.ExportJson() + // gml/graphml experimental + case "gml": + export.ExportGml() + case "graphml": + export.ExportGraphml() + default: + return fmt.Errorf("unknown format %q", typ) + } + + if export.Err() != nil { + return export.Err() + } + + if outFile != "-" { + fmt.Printf("%d entries were written\n", export.Count()) + } + return nil +}