From d76213fb2d49e4e8b1c8ef4126308a77e3090428 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 25 Jul 2014 11:22:24 +0930 Subject: [PATCH] Handle comments in N-Quad documents and REPL The parser rejects an N-Quad with a comment, so we filter those out ahead of time. This simplifies the grammar and code generated by the parser. --- db/repl.go | 5 +++++ nquads/nquads.go | 20 ++++++++++++++------ nquads/nquads_test.go | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 6 deletions(-) diff --git a/db/repl.go b/db/repl.go index 9b918ed..a42ff6d 100644 --- a/db/repl.go +++ b/db/repl.go @@ -99,6 +99,11 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error if len(line) == 0 { continue } + line = bytes.TrimSpace(line) + if len(line) == 0 || line[0] == '#' { + line = line[:0] + continue + } if bytes.HasPrefix(line, []byte(":debug")) { ses.ToggleDebug() fmt.Println("Debug Toggled") diff --git a/nquads/nquads.go b/nquads/nquads.go index f24b4f2..d0570a6 100644 --- a/nquads/nquads.go +++ b/nquads/nquads.go @@ -16,6 +16,7 @@ package nquads import ( "bufio" + "bytes" "errors" "fmt" "io" @@ -47,17 +48,24 @@ func NewDecoder(r io.Reader) *Decoder { func (dec *Decoder) Unmarshal() (*graph.Triple, error) { dec.line = dec.line[:0] + var line []byte for { - l, pre, err := dec.r.ReadLine() - if err != nil { - return nil, err + for { + l, pre, err := dec.r.ReadLine() + if err != nil { + return nil, err + } + dec.line = append(dec.line, l...) + if !pre { + break + } } - dec.line = append(dec.line, l...) - if !pre { + if line = bytes.TrimSpace(dec.line); len(line) != 0 && line[0] != '#' { break } + dec.line = dec.line[:0] } - triple, err := Parse(string(dec.line)) + triple, err := Parse(string(line)) if err != nil { return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err) } diff --git a/nquads/nquads_test.go b/nquads/nquads_test.go index c2eb3b1..328c1e8 100644 --- a/nquads/nquads_test.go +++ b/nquads/nquads_test.go @@ -16,7 +16,9 @@ package nquads import ( "fmt" + "io" "reflect" + "strings" "testing" "github.com/google/cayley/graph" @@ -428,6 +430,56 @@ func TestParse(t *testing.T) { } } +// This is a sample taken from 30kmovies.nq. +// It has intentional defects: +// The second comment is inset one psace and +// the second line after that comment is blank. +var document = `# first 10 lines of 30kmovies.nq +_:100000 . +_:100001 . +_:100002 . +_:100003 . +_:100004 . +_:100005 . +_:100006 . +_:100007 . +_:100008 . +_:100009 . + #last ten lines of 30kmovies.nq + "Bill Fishman" . + + . + "Matthew J. Evans" . + . + "Nina Bonherry" . + . + "Bill Roberts" . + . + "Christopher Ashley" . + . +` + +func TestDecoder(t *testing.T) { + dec := NewDecoder(strings.NewReader(document)) + var n int + for { + triple, err := dec.Unmarshal() + if err != nil { + if err != io.EOF { + t.Fatalf("Failed to read document:", err) + } + break + } + if triple.Subject == "" || triple.Predicate == "" || triple.Object == "" { + t.Errorf("Unexpected triple, got:%v", triple) + } + n++ + } + if n != 20 { + t.Errorf("Unexpected number of triples read, got:%d expect:20", n) + } +} + var result *graph.Triple func BenchmarkParser(b *testing.B) {