Fix unicode handling and add tests

We now also return an incomplete triple to aid in debugging - non-nil
error means that this is not usable except for manual examination.
This commit is contained in:
kortschak 2014-07-25 10:46:10 +09:30
parent 59e3d620a5
commit dc17ccae80
5 changed files with 79 additions and 10 deletions

View file

@ -76,8 +76,12 @@
action Error {
if p < len(data) {
return graph.Triple{}, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p)
if r := data[p]; r < unicode.MaxASCII {
return triple, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p)
} else {
return triple, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", ErrInvalid, data[p], data[p], p)
}
}
return graph.Triple{}, ErrIncomplete
return triple, ErrIncomplete
}
}%%

View file

@ -30,12 +30,10 @@ var (
ErrUnterminated = errors.New("nqauds: unterminated quad")
)
// Parse returns a valid graph.Triple or a non-nil error.
func Parse(str string) (*graph.Triple, error) {
t, err := parse([]rune(str))
if err != nil {
return nil, err
}
return &t, nil
return &t, err
}
type Decoder struct {

View file

@ -31,7 +31,7 @@
| 0x3001 .. 0xd7ff
| 0xf900 .. 0xfdcf
| 0xfdf0 .. 0xfffd
| 0x10000 .. 0x1efff
| 0x10000 .. 0xeffff
;
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
@ -58,7 +58,9 @@
| 0x0e .. '!'
| '#' .. '['
| ']' .. '~'
| ECHAR | UCHAR)*
| 0x80 .. 0x10ffff
| ECHAR
| UCHAR)*
'"'
;
@ -69,7 +71,9 @@
| ']'
| '_'
| 'a' .. 'z'
| '~' | UCHAR)*
| '~'
| 0x80 .. 0x10ffff
| UCHAR)*
'>'
;

View file

@ -15,6 +15,7 @@
package nquads
import (
"fmt"
"reflect"
"testing"
@ -29,6 +30,31 @@ var testNTriples = []struct {
}{
// Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/.
// _:100000 </film/performance/actor> </en/larry_fine_1902> . # example from 30movies
{
message: "parse triple with commment",
input: `_:100000 </film/performance/actor> </en/larry_fine_1902> . # example from 30movies`,
expect: &graph.Triple{
Subject: "_:100000",
Predicate: "</film/performance/actor>",
Object: "</en/larry_fine_1902>",
Provenance: "",
},
err: nil,
},
// _:10011 </film/performance/character> "Tomás de Torquemada" . # example from 30movies with unicode
{
message: "parse triple with commment",
input: `_:10011 </film/performance/character> "Tomás de Torquemada" . # example from 30movies with unicode`,
expect: &graph.Triple{
Subject: "_:10011",
Predicate: "</film/performance/character>",
Object: `"Tomás de Torquemada"`,
Provenance: "",
},
err: nil,
},
// N-Triples example 1.
{
message: "parse triple with commment",
@ -352,12 +378,48 @@ var testNTriples = []struct {
},
err: nil,
},
// Invalid input.
{
message: "parse empty",
input: ``,
expect: &graph.Triple{},
err: ErrIncomplete,
},
{
message: "parse commented",
input: `# comment`,
expect: &graph.Triple{},
err: fmt.Errorf("%v: unexpected rune '#' at 0", ErrInvalid),
},
{
message: "parse incomplete quad",
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
Object: "",
Provenance: "",
},
err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid),
},
{
message: "parse incomplete quad",
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
Object: "",
Provenance: "",
},
err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid),
},
}
func TestParse(t *testing.T) {
for _, test := range testNTriples {
got, err := Parse(test.input)
if err != test.err {
if err != test.err && (err != nil && err.Error() != test.err.Error()) {
t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err)
}
if !reflect.DeepEqual(got, test.expect) {

View file

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"strconv"
"unicode"
"github.com/google/cayley/graph"
)