diff --git a/nquads/actions.rl b/nquads/actions.rl index a53fc93..cff580b 100644 --- a/nquads/actions.rl +++ b/nquads/actions.rl @@ -76,8 +76,12 @@ action Error { if p < len(data) { - return graph.Triple{}, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p) + if r := data[p]; r < unicode.MaxASCII { + return triple, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p) + } else { + return triple, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", ErrInvalid, data[p], data[p], p) + } } - return graph.Triple{}, ErrIncomplete + return triple, ErrIncomplete } }%% diff --git a/nquads/nquads.go b/nquads/nquads.go index b86bb4f..f24b4f2 100644 --- a/nquads/nquads.go +++ b/nquads/nquads.go @@ -30,12 +30,10 @@ var ( ErrUnterminated = errors.New("nqauds: unterminated quad") ) +// Parse returns a valid graph.Triple or a non-nil error. func Parse(str string) (*graph.Triple, error) { t, err := parse([]rune(str)) - if err != nil { - return nil, err - } - return &t, nil + return &t, err } type Decoder struct { diff --git a/nquads/nquads.rl b/nquads/nquads.rl index 1ca056c..990213a 100644 --- a/nquads/nquads.rl +++ b/nquads/nquads.rl @@ -31,7 +31,7 @@ | 0x3001 .. 0xd7ff | 0xf900 .. 0xfdcf | 0xfdf0 .. 0xfffd - | 0x10000 .. 0x1efff + | 0x10000 .. 0xeffff ; PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ; @@ -58,7 +58,9 @@ | 0x0e .. '!' | '#' .. '[' | ']' .. '~' - | ECHAR | UCHAR)* + | 0x80 .. 0x10ffff + | ECHAR + | UCHAR)* '"' ; @@ -69,7 +71,9 @@ | ']' | '_' | 'a' .. 'z' - | '~' | UCHAR)* + | '~' + | 0x80 .. 0x10ffff + | UCHAR)* '>' ; diff --git a/nquads/nquads_test.go b/nquads/nquads_test.go index eb5a548..c2eb3b1 100644 --- a/nquads/nquads_test.go +++ b/nquads/nquads_test.go @@ -15,6 +15,7 @@ package nquads import ( + "fmt" "reflect" "testing" @@ -29,6 +30,31 @@ var testNTriples = []struct { }{ // Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/. + // _:100000 . # example from 30movies + { + message: "parse triple with commment", + input: `_:100000 . # example from 30movies`, + expect: &graph.Triple{ + Subject: "_:100000", + Predicate: "", + Object: "", + Provenance: "", + }, + err: nil, + }, + // _:10011 "Tomás de Torquemada" . # example from 30movies with unicode + { + message: "parse triple with commment", + input: `_:10011 "Tomás de Torquemada" . # example from 30movies with unicode`, + expect: &graph.Triple{ + Subject: "_:10011", + Predicate: "", + Object: `"Tomás de Torquemada"`, + Provenance: "", + }, + err: nil, + }, + // N-Triples example 1. { message: "parse triple with commment", @@ -352,12 +378,48 @@ var testNTriples = []struct { }, err: nil, }, + + // Invalid input. + { + message: "parse empty", + input: ``, + expect: &graph.Triple{}, + err: ErrIncomplete, + }, + { + message: "parse commented", + input: `# comment`, + expect: &graph.Triple{}, + err: fmt.Errorf("%v: unexpected rune '#' at 0", ErrInvalid), + }, + { + message: "parse incomplete quad", + input: ` .`, + expect: &graph.Triple{ + Subject: "", + Predicate: "", + Object: "", + Provenance: "", + }, + err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid), + }, + { + message: "parse incomplete quad", + input: ` .`, + expect: &graph.Triple{ + Subject: "", + Predicate: "", + Object: "", + Provenance: "", + }, + err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid), + }, } func TestParse(t *testing.T) { for _, test := range testNTriples { got, err := Parse(test.input) - if err != test.err { + if err != test.err && (err != nil && err.Error() != test.err.Error()) { t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err) } if !reflect.DeepEqual(got, test.expect) { diff --git a/nquads/parse.rl b/nquads/parse.rl index fb97e25..6e73684 100644 --- a/nquads/parse.rl +++ b/nquads/parse.rl @@ -21,6 +21,7 @@ import ( "errors" "fmt" "strconv" + "unicode" "github.com/google/cayley/graph" )