Fix unicode handling and add tests

We now also return an incomplete triple to aid in debugging - non-nil
error means that this is not usable except for manual examination.
This commit is contained in:
kortschak 2014-07-25 10:46:10 +09:30
parent 59e3d620a5
commit dc17ccae80
5 changed files with 79 additions and 10 deletions

View file

@ -76,8 +76,12 @@
action Error { action Error {
if p < len(data) { if p < len(data) {
return graph.Triple{}, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p) if r := data[p]; r < unicode.MaxASCII {
return triple, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p)
} else {
return triple, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", ErrInvalid, data[p], data[p], p)
}
} }
return graph.Triple{}, ErrIncomplete return triple, ErrIncomplete
} }
}%% }%%

View file

@ -30,12 +30,10 @@ var (
ErrUnterminated = errors.New("nqauds: unterminated quad") ErrUnterminated = errors.New("nqauds: unterminated quad")
) )
// Parse returns a valid graph.Triple or a non-nil error.
func Parse(str string) (*graph.Triple, error) { func Parse(str string) (*graph.Triple, error) {
t, err := parse([]rune(str)) t, err := parse([]rune(str))
if err != nil { return &t, err
return nil, err
}
return &t, nil
} }
type Decoder struct { type Decoder struct {

View file

@ -31,7 +31,7 @@
| 0x3001 .. 0xd7ff | 0x3001 .. 0xd7ff
| 0xf900 .. 0xfdcf | 0xf900 .. 0xfdcf
| 0xfdf0 .. 0xfffd | 0xfdf0 .. 0xfffd
| 0x10000 .. 0x1efff | 0x10000 .. 0xeffff
; ;
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ; PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
@ -58,7 +58,9 @@
| 0x0e .. '!' | 0x0e .. '!'
| '#' .. '[' | '#' .. '['
| ']' .. '~' | ']' .. '~'
| ECHAR | UCHAR)* | 0x80 .. 0x10ffff
| ECHAR
| UCHAR)*
'"' '"'
; ;
@ -69,7 +71,9 @@
| ']' | ']'
| '_' | '_'
| 'a' .. 'z' | 'a' .. 'z'
| '~' | UCHAR)* | '~'
| 0x80 .. 0x10ffff
| UCHAR)*
'>' '>'
; ;

View file

@ -15,6 +15,7 @@
package nquads package nquads
import ( import (
"fmt"
"reflect" "reflect"
"testing" "testing"
@ -29,6 +30,31 @@ var testNTriples = []struct {
}{ }{
// Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/. // Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/.
// _:100000 </film/performance/actor> </en/larry_fine_1902> . # example from 30movies
{
message: "parse triple with commment",
input: `_:100000 </film/performance/actor> </en/larry_fine_1902> . # example from 30movies`,
expect: &graph.Triple{
Subject: "_:100000",
Predicate: "</film/performance/actor>",
Object: "</en/larry_fine_1902>",
Provenance: "",
},
err: nil,
},
// _:10011 </film/performance/character> "Tomás de Torquemada" . # example from 30movies with unicode
{
message: "parse triple with commment",
input: `_:10011 </film/performance/character> "Tomás de Torquemada" . # example from 30movies with unicode`,
expect: &graph.Triple{
Subject: "_:10011",
Predicate: "</film/performance/character>",
Object: `"Tomás de Torquemada"`,
Provenance: "",
},
err: nil,
},
// N-Triples example 1. // N-Triples example 1.
{ {
message: "parse triple with commment", message: "parse triple with commment",
@ -352,12 +378,48 @@ var testNTriples = []struct {
}, },
err: nil, err: nil,
}, },
// Invalid input.
{
message: "parse empty",
input: ``,
expect: &graph.Triple{},
err: ErrIncomplete,
},
{
message: "parse commented",
input: `# comment`,
expect: &graph.Triple{},
err: fmt.Errorf("%v: unexpected rune '#' at 0", ErrInvalid),
},
{
message: "parse incomplete quad",
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
Object: "",
Provenance: "",
},
err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid),
},
{
message: "parse incomplete quad",
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
Object: "",
Provenance: "",
},
err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid),
},
} }
func TestParse(t *testing.T) { func TestParse(t *testing.T) {
for _, test := range testNTriples { for _, test := range testNTriples {
got, err := Parse(test.input) got, err := Parse(test.input)
if err != test.err { if err != test.err && (err != nil && err.Error() != test.err.Error()) {
t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err) t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err)
} }
if !reflect.DeepEqual(got, test.expect) { if !reflect.DeepEqual(got, test.expect) {

View file

@ -21,6 +21,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"strconv" "strconv"
"unicode"
"github.com/google/cayley/graph" "github.com/google/cayley/graph"
) )