Handle comments in N-Quad documents and REPL

The parser rejects an N-Quad with a comment, so we filter those out
ahead of time. This simplifies the grammar and code generated by the
parser.
This commit is contained in:
kortschak 2014-07-25 11:22:24 +09:30
parent dc17ccae80
commit d76213fb2d
3 changed files with 71 additions and 6 deletions

View file

@ -99,6 +99,11 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error
if len(line) == 0 {
continue
}
line = bytes.TrimSpace(line)
if len(line) == 0 || line[0] == '#' {
line = line[:0]
continue
}
if bytes.HasPrefix(line, []byte(":debug")) {
ses.ToggleDebug()
fmt.Println("Debug Toggled")

View file

@ -16,6 +16,7 @@ package nquads
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
@ -47,17 +48,24 @@ func NewDecoder(r io.Reader) *Decoder {
func (dec *Decoder) Unmarshal() (*graph.Triple, error) {
dec.line = dec.line[:0]
var line []byte
for {
l, pre, err := dec.r.ReadLine()
if err != nil {
return nil, err
for {
l, pre, err := dec.r.ReadLine()
if err != nil {
return nil, err
}
dec.line = append(dec.line, l...)
if !pre {
break
}
}
dec.line = append(dec.line, l...)
if !pre {
if line = bytes.TrimSpace(dec.line); len(line) != 0 && line[0] != '#' {
break
}
dec.line = dec.line[:0]
}
triple, err := Parse(string(dec.line))
triple, err := Parse(string(line))
if err != nil {
return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err)
}

View file

@ -16,7 +16,9 @@ package nquads
import (
"fmt"
"io"
"reflect"
"strings"
"testing"
"github.com/google/cayley/graph"
@ -428,6 +430,56 @@ func TestParse(t *testing.T) {
}
}
// This is a sample taken from 30kmovies.nq.
// It has intentional defects:
// The second comment is inset one psace and
// the second line after that comment is blank.
var document = `# first 10 lines of 30kmovies.nq
_:100000 </film/performance/actor> </en/larry_fine_1902> .
_:100001 </film/performance/actor> </en/samuel_howard> .
_:100002 </film/performance/actor> </en/joe_palma> .
_:100003 </film/performance/actor> </en/symona_boniface> .
_:100004 </film/performance/actor> </en/dudley_dickerson> .
_:100005 </film/performance/actor> </guid/9202a8c04000641f8000000006ec181a> .
_:100006 </film/performance/actor> </en/emil_sitka> .
_:100007 </film/performance/actor> </en/christine_mcintyre> .
_:100008 </film/performance/actor> </en/moe_howard> .
_:100009 </film/performance/actor> </en/larry_fine_1902> .
#last ten lines of 30kmovies.nq
</guid/9202a8c04000641f800000001473e673> <name> "Bill Fishman" .
</guid/9202a8c04000641f800000001473e673> <type> </people/person> .
</guid/9202a8c04000641f800000001474a221> <name> "Matthew J. Evans" .
</guid/9202a8c04000641f800000001474a221> <type> </people/person> .
</guid/9202a8c04000641f800000001474f486> <name> "Nina Bonherry" .
</guid/9202a8c04000641f800000001474f486> <type> </people/person> .
</user/basketball_loader/basketballdatabase_namespace/ROBERBI01> <name> "Bill Roberts" .
</user/basketball_loader/basketballdatabase_namespace/ROBERBI01> <type> </people/person> .
</user/jamie/nytdataid/N17971793050606542713> <name> "Christopher Ashley" .
</user/jamie/nytdataid/N17971793050606542713> <type> </people/person> .
`
func TestDecoder(t *testing.T) {
dec := NewDecoder(strings.NewReader(document))
var n int
for {
triple, err := dec.Unmarshal()
if err != nil {
if err != io.EOF {
t.Fatalf("Failed to read document:", err)
}
break
}
if triple.Subject == "" || triple.Predicate == "" || triple.Object == "" {
t.Errorf("Unexpected triple, got:%v", triple)
}
n++
}
if n != 20 {
t.Errorf("Unexpected number of triples read, got:%d expect:20", n)
}
}
var result *graph.Triple
func BenchmarkParser(b *testing.B) {