Merge pull request #292 from ds--/master

Adds exporting capabilities
This commit is contained in:
Barak Michener 2015-10-05 17:26:10 -04:00
commit 0274e9f73c
3 changed files with 268 additions and 0 deletions

View file

@ -51,6 +51,8 @@ var (
configFile = flag.String("config", "", "Path to an explicit configuration file.")
databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.")
databaseBackend = flag.String("db", "memstore", "Database Backend.")
dumpFile = flag.String("dump", "dbdump.nq", `Quad file to dump the database to (".gz" supported, "-" for stdout).`)
dumpType = flag.String("dump_type", "quad", `Quad file format ("json", "quad", "gml", "graphml").`)
replicationBackend = flag.String("replication", "single", "Replication method.")
host = flag.String("host", "127.0.0.1", "Host to listen on (defaults to all).")
loadSize = flag.Int("load_size", 10000, "Size of quadsets to load")
@ -74,6 +76,7 @@ Commands:
init Create an empty database.
load Bulk-load a quad file into the database.
http Serve an HTTP endpoint on the given host and port.
dump Bulk-dump the database into a quad file.
repl Drop into a REPL of the given query language.
version Version information.
@ -215,6 +218,25 @@ func main() {
handle.Close()
case "dump":
handle, err = db.Open(cfg)
if err != nil {
break
}
if !graph.IsPersistent(cfg.DatabaseType) {
err = internal.Load(handle.QuadWriter, cfg, *quadFile, *quadType)
if err != nil {
break
}
}
err = internal.Dump(handle.QuadStore, *dumpFile, *dumpType)
if err != nil {
break
}
handle.Close()
case "repl":
handle, err = db.Open(cfg)
if err != nil {

185
exporter/exporter.go Normal file
View file

@ -0,0 +1,185 @@
package exporter
import (
"encoding/json"
"io"
"strconv"
"github.com/google/cayley/graph"
)
type Exporter struct {
wr io.Writer
qstore graph.QuadStore
qi graph.Iterator
err error
count int
}
func NewExporter(writer io.Writer, qstore graph.QuadStore) *Exporter {
return NewExporterForIterator(writer, qstore, qstore.QuadsAllIterator())
}
func NewExporterForIterator(writer io.Writer, qstore graph.QuadStore, qi graph.Iterator) *Exporter {
return &Exporter{wr: writer, qstore: qstore, qi: qi}
}
// number of records
func (exp *Exporter) Count() int {
return exp.count
}
func (exp *Exporter) ExportQuad() {
exp.qi.Reset()
for it := exp.qi; graph.Next(it); {
exp.count++
quad := exp.qstore.Quad(it.Result())
exp.WriteEscString(quad.Subject)
exp.Write(" ")
exp.WriteEscString(quad.Predicate)
exp.Write(" ")
exp.WriteEscString(quad.Object)
if quad.Label != "" {
exp.Write(" ")
exp.WriteEscString(quad.Label)
}
exp.Write(" .\n")
}
}
func (exp *Exporter) ExportJson() {
var jstr []byte
exp.Write("[")
exp.qi.Reset()
for it := exp.qi; graph.Next(it); {
exp.count++
if exp.count > 1 {
exp.Write(",")
}
jstr, exp.err = json.Marshal(exp.qstore.Quad(it.Result()))
if exp.err != nil {
return
}
exp.Write(string(jstr[:]))
}
exp.Write("]\n")
}
//experimental
func (exp *Exporter) ExportGml() {
var seen map[string]int32 // todo eliminate this for large dbs
var id int32
exp.Write("Creator Cayley\ngraph\n[\n")
seen = make(map[string]int32)
exp.qi.Reset()
for it := exp.qi; graph.Next(it); {
cur := exp.qstore.Quad(it.Result())
if _, ok := seen[cur.Subject]; !ok {
exp.Write(" node\n [\n id ")
seen[cur.Subject] = id
exp.Write(strconv.FormatInt(int64(id), 10))
exp.Write("\n label ")
exp.WriteEscString(cur.Subject)
exp.Write("\n ]\n")
id++
}
if _, ok := seen[cur.Object]; !ok {
exp.Write(" node\n [\n id ")
seen[cur.Object] = id
exp.Write(strconv.FormatInt(int64(id), 10))
exp.Write("\n label ")
exp.WriteEscString(cur.Object)
exp.Write("\n ]\n")
id++
}
exp.count++
}
exp.qi.Reset()
for it := exp.qi; graph.Next(it); {
cur := exp.qstore.Quad(it.Result())
exp.Write(" edge\n [\n source ")
exp.Write(strconv.FormatInt(int64(seen[cur.Subject]), 10))
exp.Write("\n target ")
exp.Write(strconv.FormatInt(int64(seen[cur.Object]), 10))
exp.Write("\n label ")
exp.WriteEscString(cur.Predicate)
exp.Write("\n ]\n")
exp.count++
}
exp.Write("]\n")
}
//experimental
func (exp *Exporter) ExportGraphml() {
var seen map[string]bool // eliminate this for large databases
exp.Write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
exp.Write("<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\"\n")
exp.Write(" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n")
exp.Write(" xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd\">\n")
exp.Write(" <graph id=\"Caylay\" edgedefault=\"directed\">\n")
seen = make(map[string]bool)
exp.qi.Reset()
for it := exp.qi; graph.Next(it); {
cur := exp.qstore.Quad(it.Result())
if found := seen[cur.Subject]; !found {
seen[cur.Subject] = true
exp.Write(" <node id=")
exp.WriteEscString(cur.Subject)
exp.Write(" />\n")
}
if found := seen[cur.Object]; !found {
seen[cur.Object] = true
exp.Write(" <node id=")
exp.WriteEscString(cur.Object)
exp.Write(" />\n")
}
exp.count++
}
exp.qi.Reset()
for it := exp.qi; graph.Next(it); {
cur := exp.qstore.Quad(it.Result())
exp.Write(" <edge source=")
exp.WriteEscString(cur.Subject)
exp.Write(" target=")
exp.WriteEscString(cur.Object)
exp.Write(">\n")
exp.Write(" <data key=\"predicate\">")
exp.Write(cur.Predicate)
exp.Write("</data>\n </edge>\n")
exp.count++
}
exp.Write(" </graph>\n</graphml>\n")
}
//print out the string quoted, escaped
func (exp *Exporter) WriteEscString(str string) {
var esc []byte
if exp.err != nil {
return
}
esc, exp.err = json.Marshal(str)
if exp.err != nil {
return
}
_, exp.err = exp.wr.Write(esc)
}
func (exp *Exporter) Write(str string) {
if exp.err != nil {
return
}
_, exp.err = exp.wr.Write([]byte(str))
}
func (exp *Exporter) Err() error {
return exp.err
}

61
internal/dump.go Normal file
View file

@ -0,0 +1,61 @@
package internal
import (
"fmt"
"os"
"compress/gzip"
"path/filepath"
"github.com/google/cayley/graph"
"github.com/google/cayley/exporter"
)
// Dump the content of the database into a file based
// on a few different formats
func Dump(qs graph.QuadStore, outFile, typ string) error {
var f *os.File
if outFile == "-" {
f = os.Stdout
} else {
var err error
f, err = os.Create(outFile)
if err != nil {
return fmt.Errorf("could not open file %q: %v", outFile, err)
}
defer f.Close()
fmt.Printf("dumping db to file %q\n", outFile)
}
var export *exporter.Exporter
if filepath.Ext(outFile) == ".gz" {
gzip := gzip.NewWriter(f)
defer gzip.Close()
export = exporter.NewExporter(gzip, qs)
} else {
export = exporter.NewExporter(f, qs)
}
//TODO: add possible support for exporting specific queries only
switch typ {
case "quad":
export.ExportQuad()
case "json":
export.ExportJson()
// gml/graphml experimental
case "gml":
export.ExportGml()
case "graphml":
export.ExportGraphml()
default:
return fmt.Errorf("unknown format %q", typ)
}
if export.Err() != nil {
return export.Err()
}
if outFile != "-" {
fmt.Printf("%d entries were written\n", export.Count())
}
return nil
}