Use error returns and interface type for parsing
Fixes issue #72 This change simplifies interactions with parsing N-Quads and makes reading datasets more robust. Changes made while here also improve performance: benchmark old ns/op new ns/op delta BenchmarkParser 1058 667 -36.96% We still use string concatenation which I'm not wildly happy about, but I think this can be left for a later change. Initial changes towards idiomatic error handling have been made. More significant changes are needed, but these have subtle design implication and need to be thought about more. 30kmoviesdata.nt.gz has been altered to properly escape double quotes. This was done mechanically and with manual curation to pick up straglers.
This commit is contained in:
parent
abdd649c82
commit
0e0e382d2b
11 changed files with 260 additions and 226 deletions
Binary file not shown.
|
|
@ -15,14 +15,12 @@
|
||||||
package cayleyappengine
|
package cayleyappengine
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/barakmich/glog"
|
"github.com/barakmich/glog"
|
||||||
|
|
||||||
"github.com/google/cayley/config"
|
"github.com/google/cayley/config"
|
||||||
|
"github.com/google/cayley/db"
|
||||||
"github.com/google/cayley/graph"
|
"github.com/google/cayley/graph"
|
||||||
"github.com/google/cayley/http"
|
"github.com/google/cayley/http"
|
||||||
"github.com/google/cayley/nquads"
|
|
||||||
|
|
||||||
_ "github.com/google/cayley/graph/memstore"
|
_ "github.com/google/cayley/graph/memstore"
|
||||||
)
|
)
|
||||||
|
|
@ -32,37 +30,6 @@ func init() {
|
||||||
cfg := config.ParseConfigFromFile("cayley_appengine.cfg")
|
cfg := config.ParseConfigFromFile("cayley_appengine.cfg")
|
||||||
ts, _ := graph.NewTripleStore("memstore", "", nil)
|
ts, _ := graph.NewTripleStore("memstore", "", nil)
|
||||||
glog.Errorln(cfg)
|
glog.Errorln(cfg)
|
||||||
LoadTriplesFromFileInto(ts, cfg.DatabasePath, cfg.LoadSize)
|
db.Load(ts, cfg, cfg.DatabasePath)
|
||||||
http.SetupRoutes(ts, cfg)
|
http.SetupRoutes(ts, cfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) {
|
|
||||||
f, err := os.Open(tripleFile)
|
|
||||||
if err != nil {
|
|
||||||
glog.Fatalln("Couldn't open file", tripleFile)
|
|
||||||
}
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
if err := f.Close(); err != nil {
|
|
||||||
glog.Fatalln(err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
nquads.ReadNQuadsFromReader(c, f)
|
|
||||||
}
|
|
||||||
|
|
||||||
func LoadTriplesFromFileInto(ts graph.TripleStore, filename string, loadSize int) {
|
|
||||||
tChan := make(chan *graph.Triple)
|
|
||||||
go ReadTriplesFromFile(tChan, filename)
|
|
||||||
tripleblock := make([]*graph.Triple, loadSize)
|
|
||||||
i := 0
|
|
||||||
for t := range tChan {
|
|
||||||
tripleblock[i] = t
|
|
||||||
i++
|
|
||||||
if i == loadSize {
|
|
||||||
ts.AddTripleSet(tripleblock)
|
|
||||||
i = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ts.AddTripleSet(tripleblock[0:i])
|
|
||||||
}
|
|
||||||
|
|
|
||||||
39
cayley.go
39
cayley.go
|
|
@ -58,37 +58,62 @@ func main() {
|
||||||
Usage()
|
Usage()
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd := os.Args[1]
|
cmd := os.Args[1]
|
||||||
newargs := make([]string, 0)
|
newargs := make([]string, 0)
|
||||||
newargs = append(newargs, os.Args[0])
|
newargs = append(newargs, os.Args[0])
|
||||||
newargs = append(newargs, os.Args[2:]...)
|
newargs = append(newargs, os.Args[2:]...)
|
||||||
os.Args = newargs
|
os.Args = newargs
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
var ts graph.TripleStore
|
|
||||||
cfg := config.ParseConfigFromFlagsAndFile(*configFile)
|
cfg := config.ParseConfigFromFlagsAndFile(*configFile)
|
||||||
|
|
||||||
if os.Getenv("GOMAXPROCS") == "" {
|
if os.Getenv("GOMAXPROCS") == "" {
|
||||||
runtime.GOMAXPROCS(runtime.NumCPU())
|
runtime.GOMAXPROCS(runtime.NumCPU())
|
||||||
glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU())
|
glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU())
|
||||||
} else {
|
} else {
|
||||||
glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting")
|
glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
ts graph.TripleStore
|
||||||
|
err error
|
||||||
|
)
|
||||||
switch cmd {
|
switch cmd {
|
||||||
case "init":
|
case "init":
|
||||||
db.Init(cfg, *tripleFile)
|
err = db.Init(cfg, *tripleFile)
|
||||||
case "load":
|
case "load":
|
||||||
ts = db.Open(cfg)
|
ts, err = db.Open(cfg)
|
||||||
db.Load(ts, cfg, *tripleFile)
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err = db.Load(ts, cfg, *tripleFile)
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
ts.Close()
|
ts.Close()
|
||||||
case "repl":
|
case "repl":
|
||||||
ts = db.Open(cfg)
|
ts, err = db.Open(cfg)
|
||||||
db.Repl(ts, *queryLanguage, cfg)
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err = db.Repl(ts, *queryLanguage, cfg)
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
ts.Close()
|
ts.Close()
|
||||||
case "http":
|
case "http":
|
||||||
ts = db.Open(cfg)
|
ts, err = db.Open(cfg)
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
http.Serve(ts, cfg)
|
http.Serve(ts, cfg)
|
||||||
ts.Close()
|
ts.Close()
|
||||||
default:
|
default:
|
||||||
fmt.Println("No command", cmd)
|
fmt.Println("No command", cmd)
|
||||||
flag.Usage()
|
flag.Usage()
|
||||||
}
|
}
|
||||||
|
if err != nil {
|
||||||
|
glog.Fatalln(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
17
db/init.go
17
db/init.go
|
|
@ -19,16 +19,21 @@ import (
|
||||||
"github.com/google/cayley/graph"
|
"github.com/google/cayley/graph"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Init(cfg *config.Config, triplePath string) bool {
|
func Init(cfg *config.Config, triplePath string) error {
|
||||||
err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
|
err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if triplePath != "" {
|
if triplePath != "" {
|
||||||
ts := Open(cfg)
|
ts, err := Open(cfg)
|
||||||
Load(ts, cfg, triplePath)
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = Load(ts, cfg, triplePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
ts.Close()
|
ts.Close()
|
||||||
}
|
}
|
||||||
return true
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
||||||
64
db/load.go
64
db/load.go
|
|
@ -15,58 +15,54 @@
|
||||||
package db
|
package db
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/barakmich/glog"
|
|
||||||
|
|
||||||
"github.com/google/cayley/config"
|
"github.com/google/cayley/config"
|
||||||
"github.com/google/cayley/graph"
|
"github.com/google/cayley/graph"
|
||||||
"github.com/google/cayley/nquads"
|
"github.com/google/cayley/nquads"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Load(ts graph.TripleStore, cfg *config.Config, triplePath string) {
|
func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
|
||||||
tChan := make(chan *graph.Triple)
|
f, err := os.Open(path)
|
||||||
go ReadTriplesFromFile(tChan, triplePath)
|
if err != nil {
|
||||||
|
return fmt.Errorf("could not open file %q: %v", path, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
dec := nquads.NewDecoder(f)
|
||||||
|
|
||||||
bulker, canBulk := ts.(graph.BulkLoader)
|
bulker, canBulk := ts.(graph.BulkLoader)
|
||||||
if canBulk {
|
if canBulk {
|
||||||
err := bulker.BulkLoad(tChan)
|
err = bulker.BulkLoad(dec)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
if err != graph.ErrCannotBulkLoad {
|
if err == graph.ErrCannotBulkLoad {
|
||||||
glog.Errorln("Error attempting to bulk load: ", err)
|
err = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LoadTriplesInto(tChan, ts, cfg.LoadSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) {
|
|
||||||
f, err := os.Open(tripleFile)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Fatalln("Couldn't open file", tripleFile)
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer func() {
|
block := make([]*graph.Triple, 0, cfg.LoadSize)
|
||||||
if err := f.Close(); err != nil {
|
for {
|
||||||
glog.Fatalln(err)
|
t, err := dec.Unmarshal()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
}()
|
block = append(block, t)
|
||||||
|
if len(block) == cap(block) {
|
||||||
nquads.ReadNQuadsFromReader(c, f)
|
ts.AddTripleSet(block)
|
||||||
}
|
block = block[:0]
|
||||||
|
|
||||||
func LoadTriplesInto(tChan chan *graph.Triple, ts graph.TripleStore, loadSize int) {
|
|
||||||
tripleblock := make([]*graph.Triple, loadSize)
|
|
||||||
i := 0
|
|
||||||
for t := range tChan {
|
|
||||||
tripleblock[i] = t
|
|
||||||
i++
|
|
||||||
if i == loadSize {
|
|
||||||
ts.AddTripleSet(tripleblock)
|
|
||||||
i = 0
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ts.AddTripleSet(tripleblock[0:i])
|
ts.AddTripleSet(block)
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
11
db/open.go
11
db/open.go
|
|
@ -21,17 +21,20 @@ import (
|
||||||
"github.com/google/cayley/graph"
|
"github.com/google/cayley/graph"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Open(cfg *config.Config) graph.TripleStore {
|
func Open(cfg *config.Config) (graph.TripleStore, error) {
|
||||||
glog.Infof("Opening database \"%s\" at %s", cfg.DatabaseType, cfg.DatabasePath)
|
glog.Infof("Opening database \"%s\" at %s", cfg.DatabaseType, cfg.DatabasePath)
|
||||||
ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
|
ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Fatalln(err.Error())
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Memstore is not persistent, so it MUST be loaded.
|
// Memstore is not persistent, so it MUST be loaded.
|
||||||
if cfg.DatabaseType == "memstore" {
|
if cfg.DatabaseType == "memstore" {
|
||||||
Load(ts, cfg, cfg.DatabasePath)
|
err = Load(ts, cfg, cfg.DatabasePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ts
|
return ts, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
66
db/repl.go
66
db/repl.go
|
|
@ -16,10 +16,11 @@ package db
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/cayley/config"
|
"github.com/google/cayley/config"
|
||||||
|
|
@ -60,7 +61,7 @@ func Run(query string, ses graph.Session) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) {
|
func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error {
|
||||||
var ses graph.Session
|
var ses graph.Session
|
||||||
switch queryLanguage {
|
switch queryLanguage {
|
||||||
case "sexp":
|
case "sexp":
|
||||||
|
|
@ -72,72 +73,75 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) {
|
||||||
default:
|
default:
|
||||||
ses = gremlin.NewSession(ts, cfg.GremlinTimeout, true)
|
ses = gremlin.NewSession(ts, cfg.GremlinTimeout, true)
|
||||||
}
|
}
|
||||||
inputBf := bufio.NewReader(os.Stdin)
|
buf := bufio.NewReader(os.Stdin)
|
||||||
line := ""
|
var line []byte
|
||||||
for {
|
for {
|
||||||
if line == "" {
|
if len(line) == 0 {
|
||||||
fmt.Print("cayley> ")
|
fmt.Print("cayley> ")
|
||||||
} else {
|
} else {
|
||||||
fmt.Print("... ")
|
fmt.Print("... ")
|
||||||
}
|
}
|
||||||
l, pre, err := inputBf.ReadLine()
|
l, prefix, err := buf.ReadLine()
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
if line != "" {
|
if len(line) != 0 {
|
||||||
line = ""
|
line = line[:0]
|
||||||
} else {
|
} else {
|
||||||
break
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
line = ""
|
line = line[:0]
|
||||||
}
|
}
|
||||||
if pre {
|
if prefix {
|
||||||
panic("Line too long")
|
return errors.New("line too long")
|
||||||
}
|
}
|
||||||
line += string(l)
|
line = append(line, l...)
|
||||||
if line == "" {
|
if len(line) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(line, ":debug") {
|
if bytes.HasPrefix(line, []byte(":debug")) {
|
||||||
ses.ToggleDebug()
|
ses.ToggleDebug()
|
||||||
fmt.Println("Debug Toggled")
|
fmt.Println("Debug Toggled")
|
||||||
line = ""
|
line = line[:0]
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(line, ":a") {
|
if bytes.HasPrefix(line, []byte(":a")) {
|
||||||
var tripleStmt = line[3:]
|
var tripleStmt = line[3:]
|
||||||
triple := nquads.Parse(tripleStmt)
|
triple, err := nquads.Parse(string(tripleStmt))
|
||||||
if triple == nil {
|
if triple == nil {
|
||||||
fmt.Println("Not a valid triple.")
|
if err != nil {
|
||||||
line = ""
|
fmt.Printf("not a valid triple: %v\n", err)
|
||||||
|
}
|
||||||
|
line = line[:0]
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
ts.AddTriple(triple)
|
ts.AddTriple(triple)
|
||||||
line = ""
|
line = line[:0]
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(line, ":d") {
|
if bytes.HasPrefix(line, []byte(":d")) {
|
||||||
var tripleStmt = line[3:]
|
var tripleStmt = line[3:]
|
||||||
triple := nquads.Parse(tripleStmt)
|
triple, err := nquads.Parse(string(tripleStmt))
|
||||||
if triple == nil {
|
if triple == nil {
|
||||||
fmt.Println("Not a valid triple.")
|
if err != nil {
|
||||||
line = ""
|
fmt.Printf("not a valid triple: %v\n", err)
|
||||||
|
}
|
||||||
|
line = line[:0]
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
ts.RemoveTriple(triple)
|
ts.RemoveTriple(triple)
|
||||||
line = ""
|
line = line[:0]
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
result, err := ses.InputParses(line)
|
result, err := ses.InputParses(string(line))
|
||||||
switch result {
|
switch result {
|
||||||
case graph.Parsed:
|
case graph.Parsed:
|
||||||
Run(line, ses)
|
Run(string(line), ses)
|
||||||
line = ""
|
line = line[:0]
|
||||||
case graph.ParseFail:
|
case graph.ParseFail:
|
||||||
fmt.Println("Error: ", err)
|
fmt.Println("Error: ", err)
|
||||||
line = ""
|
line = line[:0]
|
||||||
case graph.ParseMore:
|
case graph.ParseMore:
|
||||||
default:
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -122,10 +122,14 @@ func (d Options) StringKey(key string) (string, bool) {
|
||||||
var ErrCannotBulkLoad = errors.New("triplestore: cannot bulk load")
|
var ErrCannotBulkLoad = errors.New("triplestore: cannot bulk load")
|
||||||
|
|
||||||
type BulkLoader interface {
|
type BulkLoader interface {
|
||||||
// BulkLoad loads Triples from a channel in bulk to the TripleStore. It
|
// BulkLoad loads Triples from a TripleUnmarshaler in bulk to the TripleStore.
|
||||||
// returns ErrCannotBulkLoad if bulk loading is not possible (i.e. if you
|
// It returns ErrCannotBulkLoad if bulk loading is not possible. For example if
|
||||||
// cannot load in bulk to a non-empty database, and the db is non-empty)
|
// you cannot load in bulk to a non-empty database, and the db is non-empty.
|
||||||
BulkLoad(chan *Triple) error
|
BulkLoad(TripleUnmarshaler) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type TripleUnmarshaler interface {
|
||||||
|
Unmarshal() (*Triple, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type NewStoreFunc func(string, Options) (TripleStore, error)
|
type NewStoreFunc func(string, Options) (TripleStore, error)
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ package http
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
@ -77,22 +78,32 @@ func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params
|
||||||
blockSize = int64(api.config.LoadSize)
|
blockSize = int64(api.config.LoadSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
tChan := make(chan *graph.Triple)
|
dec := nquads.NewDecoder(formFile)
|
||||||
go nquads.ReadNQuadsFromReader(tChan, formFile)
|
|
||||||
tripleblock := make([]*graph.Triple, blockSize)
|
var (
|
||||||
nTriples := 0
|
n int
|
||||||
i := int64(0)
|
|
||||||
for t := range tChan {
|
block = make([]*graph.Triple, 0, blockSize)
|
||||||
tripleblock[i] = t
|
)
|
||||||
i++
|
for {
|
||||||
nTriples++
|
t, err := dec.Unmarshal()
|
||||||
if i == blockSize {
|
if err != nil {
|
||||||
api.ts.AddTripleSet(tripleblock)
|
if err == io.EOF {
|
||||||
i = 0
|
break
|
||||||
|
}
|
||||||
|
panic("what can do this here?") // FIXME(kortschak)
|
||||||
|
}
|
||||||
|
block = append(block, t)
|
||||||
|
n++
|
||||||
|
if len(block) == cap(block) {
|
||||||
|
api.ts.AddTripleSet(block)
|
||||||
|
block = block[:0]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
api.ts.AddTripleSet(tripleblock[0:i])
|
api.ts.AddTripleSet(block)
|
||||||
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", nTriples)
|
|
||||||
|
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", n)
|
||||||
|
|
||||||
return 200
|
return 200
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
185
nquads/nquads.go
185
nquads/nquads.go
|
|
@ -16,109 +16,112 @@ package nquads
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/barakmich/glog"
|
|
||||||
|
|
||||||
"github.com/google/cayley/graph"
|
"github.com/google/cayley/graph"
|
||||||
)
|
)
|
||||||
|
|
||||||
func isWhitespace(s uint8) bool {
|
var (
|
||||||
return (s == '\t' || s == '\r' || s == ' ')
|
ErrAbsentSubject = errors.New("absent subject")
|
||||||
}
|
ErrAbsentPredicate = errors.New("absent predicate")
|
||||||
func Parse(str string) *graph.Triple {
|
ErrAbsentObject = errors.New("absent object")
|
||||||
|
ErrUnterminated = errors.New("unterminated quad")
|
||||||
|
)
|
||||||
|
|
||||||
|
func Parse(str string) (*graph.Triple, error) {
|
||||||
// Skip leading whitespace.
|
// Skip leading whitespace.
|
||||||
str = skipWhitespace(str)
|
str = trimSpace(str)
|
||||||
// Check for a comment
|
// Check for a comment
|
||||||
if str != "" && str[0] == '#' {
|
if str != "" && str[0] == '#' {
|
||||||
return nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
sub, remainder := getTripleComponent(str)
|
sub, remainder := getTripleComponent(str)
|
||||||
if sub == nil {
|
if sub == "" {
|
||||||
return nil
|
return nil, ErrAbsentSubject
|
||||||
}
|
}
|
||||||
str = skipWhitespace(remainder)
|
str = trimSpace(remainder)
|
||||||
pred, remainder := getTripleComponent(str)
|
pred, remainder := getTripleComponent(str)
|
||||||
if pred == nil {
|
if pred == "" {
|
||||||
return nil
|
return nil, ErrAbsentPredicate
|
||||||
}
|
}
|
||||||
str = skipWhitespace(remainder)
|
str = trimSpace(remainder)
|
||||||
obj, remainder := getTripleComponent(str)
|
obj, remainder := getTripleComponent(str)
|
||||||
if obj == nil {
|
if obj == "" {
|
||||||
return nil
|
return nil, ErrAbsentObject
|
||||||
}
|
}
|
||||||
str = skipWhitespace(remainder)
|
str = trimSpace(remainder)
|
||||||
prov_ptr, remainder := getTripleComponent(str)
|
prov, remainder := getTripleComponent(str)
|
||||||
var prov string
|
str = trimSpace(remainder)
|
||||||
if prov_ptr == nil {
|
|
||||||
prov = ""
|
|
||||||
} else {
|
|
||||||
prov = *prov_ptr
|
|
||||||
}
|
|
||||||
str = skipWhitespace(remainder)
|
|
||||||
if str != "" && str[0] == '.' {
|
if str != "" && str[0] == '.' {
|
||||||
return &graph.Triple{*sub, *pred, *obj, prov}
|
return &graph.Triple{sub, pred, obj, prov}, nil
|
||||||
}
|
}
|
||||||
return nil
|
return nil, ErrUnterminated
|
||||||
}
|
}
|
||||||
|
|
||||||
func skipWhitespace(str string) string {
|
func isSpace(s uint8) bool {
|
||||||
|
return s == ' ' || s == '\t' || s == '\r'
|
||||||
|
}
|
||||||
|
|
||||||
|
func trimSpace(str string) string {
|
||||||
i := 0
|
i := 0
|
||||||
for i < len(str) && isWhitespace(str[i]) {
|
for i < len(str) && isSpace(str[i]) {
|
||||||
i += 1
|
i += 1
|
||||||
}
|
}
|
||||||
return str[i:]
|
return str[i:]
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTripleComponent(str string) (*string, string) {
|
func getTripleComponent(str string) (head, tail string) {
|
||||||
if len(str) == 0 {
|
if len(str) == 0 {
|
||||||
return nil, str
|
return "", str
|
||||||
}
|
}
|
||||||
if str[0] == '<' {
|
if str[0] == '<' {
|
||||||
return getUriPart(str[1:])
|
return getUriPart(str[1:])
|
||||||
} else if str[0] == '"' {
|
} else if str[0] == '"' {
|
||||||
return getQuotedPart(str[1:])
|
return getQuotedPart(str[1:])
|
||||||
} else if str[0] == '.' {
|
} else if str[0] == '.' {
|
||||||
return nil, str
|
return "", str
|
||||||
} else {
|
} else {
|
||||||
// Technically not part of the spec. But we do it anyway for convenience.
|
// Technically not part of the spec. But we do it anyway for convenience.
|
||||||
return getUnquotedPart(str)
|
return getUnquotedPart(str)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getUriPart(str string) (*string, string) {
|
func getUriPart(str string) (head, tail string) {
|
||||||
i := 0
|
i := 0
|
||||||
for i < len(str) && str[i] != '>' {
|
for i < len(str) && str[i] != '>' {
|
||||||
i += 1
|
i += 1
|
||||||
}
|
}
|
||||||
if i == len(str) {
|
if i == len(str) {
|
||||||
return nil, str
|
return "", str
|
||||||
}
|
}
|
||||||
part := str[0:i]
|
head = str[0:i]
|
||||||
return &part, str[i+1:]
|
return head, str[i+1:]
|
||||||
}
|
}
|
||||||
|
|
||||||
func getQuotedPart(str string) (*string, string) {
|
func getQuotedPart(str string) (head, tail string) {
|
||||||
i := 0
|
var (
|
||||||
start := 0
|
i int
|
||||||
out := ""
|
start int
|
||||||
|
)
|
||||||
for i < len(str) && str[i] != '"' {
|
for i < len(str) && str[i] != '"' {
|
||||||
if str[i] == '\\' {
|
if str[i] == '\\' {
|
||||||
out += str[start:i]
|
head += str[start:i]
|
||||||
switch str[i+1] {
|
switch str[i+1] {
|
||||||
case '\\':
|
case '\\':
|
||||||
out += "\\"
|
head += "\\"
|
||||||
case 'r':
|
case 'r':
|
||||||
out += "\r"
|
head += "\r"
|
||||||
case 'n':
|
case 'n':
|
||||||
out += "\n"
|
head += "\n"
|
||||||
case 't':
|
case 't':
|
||||||
out += "\t"
|
head += "\t"
|
||||||
case '"':
|
case '"':
|
||||||
out += "\""
|
head += "\""
|
||||||
default:
|
default:
|
||||||
return nil, str
|
return "", str
|
||||||
}
|
}
|
||||||
i += 2
|
i += 2
|
||||||
start = i
|
start = i
|
||||||
|
|
@ -127,70 +130,74 @@ func getQuotedPart(str string) (*string, string) {
|
||||||
i += 1
|
i += 1
|
||||||
}
|
}
|
||||||
if i == len(str) {
|
if i == len(str) {
|
||||||
return nil, str
|
return "", str
|
||||||
}
|
}
|
||||||
out += str[start:i]
|
head += str[start:i]
|
||||||
i += 1
|
i += 1
|
||||||
var remainder string
|
switch {
|
||||||
if strings.HasPrefix(str[i:], "^^<") {
|
case strings.HasPrefix(str[i:], "^^<"):
|
||||||
// Ignore type, for now
|
// Ignore type, for now
|
||||||
_, remainder = getUriPart(str[i+3:])
|
_, tail = getUriPart(str[i+3:])
|
||||||
} else if strings.HasPrefix(str[i:], "@") {
|
case str[i] == '@':
|
||||||
_, remainder = getUnquotedPart(str[i+1:])
|
_, tail = getUnquotedPart(str[i+1:])
|
||||||
} else {
|
default:
|
||||||
remainder = str[i:]
|
tail = str[i:]
|
||||||
}
|
}
|
||||||
|
|
||||||
return &out, remainder
|
return head, tail
|
||||||
}
|
}
|
||||||
|
|
||||||
func getUnquotedPart(str string) (*string, string) {
|
func getUnquotedPart(str string) (head, tail string) {
|
||||||
i := 0
|
var (
|
||||||
initStr := str
|
i int
|
||||||
out := ""
|
initStr = str
|
||||||
start := 0
|
start int
|
||||||
for i < len(str) && !isWhitespace(str[i]) {
|
)
|
||||||
|
for i < len(str) && !isSpace(str[i]) {
|
||||||
if str[i] == '"' {
|
if str[i] == '"' {
|
||||||
part, remainder := getQuotedPart(str[i+1:])
|
part, remainder := getQuotedPart(str[i+1:])
|
||||||
if part == nil {
|
if part == "" {
|
||||||
return part, initStr
|
return part, initStr
|
||||||
}
|
}
|
||||||
out += str[start:i]
|
head += str[start:i]
|
||||||
str = remainder
|
str = remainder
|
||||||
i = 0
|
i = 0
|
||||||
start = 0
|
start = 0
|
||||||
out += *part
|
head += part
|
||||||
}
|
}
|
||||||
i += 1
|
i += 1
|
||||||
}
|
}
|
||||||
out += str[start:i]
|
head += str[start:i]
|
||||||
return &out, str[i:]
|
return head, str[i:]
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) {
|
type Decoder struct {
|
||||||
bf := bufio.NewReader(reader)
|
r *bufio.Reader
|
||||||
|
line []byte
|
||||||
|
}
|
||||||
|
|
||||||
nTriples := 0
|
func NewDecoder(r io.Reader) *Decoder {
|
||||||
line := ""
|
return &Decoder{r: bufio.NewReader(r)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dec *Decoder) Unmarshal() (*graph.Triple, error) {
|
||||||
|
dec.line = dec.line[:0]
|
||||||
for {
|
for {
|
||||||
l, pre, err := bf.ReadLine()
|
l, pre, err := dec.r.ReadLine()
|
||||||
if err == io.EOF {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
dec.line = append(dec.line, l...)
|
||||||
|
if !pre {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if err != nil {
|
|
||||||
glog.Fatalln("Something bad happened while reading file " + err.Error())
|
|
||||||
}
|
|
||||||
line += string(l)
|
|
||||||
if pre {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
triple := Parse(line)
|
|
||||||
line = ""
|
|
||||||
if triple != nil {
|
|
||||||
nTriples++
|
|
||||||
c <- triple
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
glog.Infoln("Read", nTriples, "triples")
|
triple, err := Parse(string(dec.line))
|
||||||
close(c)
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err)
|
||||||
|
}
|
||||||
|
if triple == nil {
|
||||||
|
return dec.Unmarshal()
|
||||||
|
}
|
||||||
|
return triple, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,17 +25,26 @@ var testNTriples = []struct {
|
||||||
message string
|
message string
|
||||||
input string
|
input string
|
||||||
expect *graph.Triple
|
expect *graph.Triple
|
||||||
|
err error
|
||||||
}{
|
}{
|
||||||
// NTriple tests.
|
// NTriple tests.
|
||||||
{
|
{
|
||||||
message: "not parse invalid triples",
|
message: "not parse invalid triples",
|
||||||
input: "invalid",
|
input: "invalid",
|
||||||
expect: nil,
|
expect: nil,
|
||||||
|
err: ErrAbsentPredicate,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
message: "invalid internal quote",
|
||||||
|
input: `":103032" "/film/performance/character" "Walter "Teacher" Cole" .`,
|
||||||
|
expect: nil,
|
||||||
|
err: ErrUnterminated,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
message: "not parse comments",
|
message: "not parse comments",
|
||||||
input: "# nominally valid triple .",
|
input: "# nominally valid triple .",
|
||||||
expect: nil,
|
expect: nil,
|
||||||
|
err: nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
message: "parse simple triples",
|
message: "parse simple triples",
|
||||||
|
|
@ -110,7 +119,10 @@ var testNTriples = []struct {
|
||||||
|
|
||||||
func TestParse(t *testing.T) {
|
func TestParse(t *testing.T) {
|
||||||
for _, test := range testNTriples {
|
for _, test := range testNTriples {
|
||||||
got := Parse(test.input)
|
got, err := Parse(test.input)
|
||||||
|
if err != test.err {
|
||||||
|
t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err)
|
||||||
|
}
|
||||||
if !reflect.DeepEqual(got, test.expect) {
|
if !reflect.DeepEqual(got, test.expect) {
|
||||||
t.Errorf("Failed to %s, %q, got:%q expect:%q", test.message, test.input, got, test.expect)
|
t.Errorf("Failed to %s, %q, got:%q expect:%q", test.message, test.input, got, test.expect)
|
||||||
}
|
}
|
||||||
|
|
@ -121,6 +133,6 @@ var result *graph.Triple
|
||||||
|
|
||||||
func BenchmarkParser(b *testing.B) {
|
func BenchmarkParser(b *testing.B) {
|
||||||
for n := 0; n < b.N; n++ {
|
for n := 0; n < b.N; n++ {
|
||||||
result = Parse("<http://example/s> <http://example/p> \"object of some real\\tlength\"@en . # comment")
|
result, _ = Parse("<http://example/s> <http://example/p> \"object of some real\\tlength\"@en . # comment")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue