Fix unicode handling and add tests
We now also return an incomplete triple to aid in debugging - non-nil error means that this is not usable except for manual examination.
This commit is contained in:
parent
59e3d620a5
commit
dc17ccae80
5 changed files with 79 additions and 10 deletions
|
|
@ -76,8 +76,12 @@
|
||||||
|
|
||||||
action Error {
|
action Error {
|
||||||
if p < len(data) {
|
if p < len(data) {
|
||||||
return graph.Triple{}, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p)
|
if r := data[p]; r < unicode.MaxASCII {
|
||||||
|
return triple, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p)
|
||||||
|
} else {
|
||||||
|
return triple, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", ErrInvalid, data[p], data[p], p)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return graph.Triple{}, ErrIncomplete
|
return triple, ErrIncomplete
|
||||||
}
|
}
|
||||||
}%%
|
}%%
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,10 @@ var (
|
||||||
ErrUnterminated = errors.New("nqauds: unterminated quad")
|
ErrUnterminated = errors.New("nqauds: unterminated quad")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Parse returns a valid graph.Triple or a non-nil error.
|
||||||
func Parse(str string) (*graph.Triple, error) {
|
func Parse(str string) (*graph.Triple, error) {
|
||||||
t, err := parse([]rune(str))
|
t, err := parse([]rune(str))
|
||||||
if err != nil {
|
return &t, err
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return &t, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Decoder struct {
|
type Decoder struct {
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@
|
||||||
| 0x3001 .. 0xd7ff
|
| 0x3001 .. 0xd7ff
|
||||||
| 0xf900 .. 0xfdcf
|
| 0xf900 .. 0xfdcf
|
||||||
| 0xfdf0 .. 0xfffd
|
| 0xfdf0 .. 0xfffd
|
||||||
| 0x10000 .. 0x1efff
|
| 0x10000 .. 0xeffff
|
||||||
;
|
;
|
||||||
|
|
||||||
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
|
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
|
||||||
|
|
@ -58,7 +58,9 @@
|
||||||
| 0x0e .. '!'
|
| 0x0e .. '!'
|
||||||
| '#' .. '['
|
| '#' .. '['
|
||||||
| ']' .. '~'
|
| ']' .. '~'
|
||||||
| ECHAR | UCHAR)*
|
| 0x80 .. 0x10ffff
|
||||||
|
| ECHAR
|
||||||
|
| UCHAR)*
|
||||||
'"'
|
'"'
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
@ -69,7 +71,9 @@
|
||||||
| ']'
|
| ']'
|
||||||
| '_'
|
| '_'
|
||||||
| 'a' .. 'z'
|
| 'a' .. 'z'
|
||||||
| '~' | UCHAR)*
|
| '~'
|
||||||
|
| 0x80 .. 0x10ffff
|
||||||
|
| UCHAR)*
|
||||||
'>'
|
'>'
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@
|
||||||
package nquads
|
package nquads
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
|
@ -29,6 +30,31 @@ var testNTriples = []struct {
|
||||||
}{
|
}{
|
||||||
// Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/.
|
// Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/.
|
||||||
|
|
||||||
|
// _:100000 </film/performance/actor> </en/larry_fine_1902> . # example from 30movies
|
||||||
|
{
|
||||||
|
message: "parse triple with commment",
|
||||||
|
input: `_:100000 </film/performance/actor> </en/larry_fine_1902> . # example from 30movies`,
|
||||||
|
expect: &graph.Triple{
|
||||||
|
Subject: "_:100000",
|
||||||
|
Predicate: "</film/performance/actor>",
|
||||||
|
Object: "</en/larry_fine_1902>",
|
||||||
|
Provenance: "",
|
||||||
|
},
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
// _:10011 </film/performance/character> "Tomás de Torquemada" . # example from 30movies with unicode
|
||||||
|
{
|
||||||
|
message: "parse triple with commment",
|
||||||
|
input: `_:10011 </film/performance/character> "Tomás de Torquemada" . # example from 30movies with unicode`,
|
||||||
|
expect: &graph.Triple{
|
||||||
|
Subject: "_:10011",
|
||||||
|
Predicate: "</film/performance/character>",
|
||||||
|
Object: `"Tomás de Torquemada"`,
|
||||||
|
Provenance: "",
|
||||||
|
},
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
|
||||||
// N-Triples example 1.
|
// N-Triples example 1.
|
||||||
{
|
{
|
||||||
message: "parse triple with commment",
|
message: "parse triple with commment",
|
||||||
|
|
@ -352,12 +378,48 @@ var testNTriples = []struct {
|
||||||
},
|
},
|
||||||
err: nil,
|
err: nil,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Invalid input.
|
||||||
|
{
|
||||||
|
message: "parse empty",
|
||||||
|
input: ``,
|
||||||
|
expect: &graph.Triple{},
|
||||||
|
err: ErrIncomplete,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
message: "parse commented",
|
||||||
|
input: `# comment`,
|
||||||
|
expect: &graph.Triple{},
|
||||||
|
err: fmt.Errorf("%v: unexpected rune '#' at 0", ErrInvalid),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
message: "parse incomplete quad",
|
||||||
|
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> .`,
|
||||||
|
expect: &graph.Triple{
|
||||||
|
Subject: "<http://example.org/bob#me>",
|
||||||
|
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
|
||||||
|
Object: "",
|
||||||
|
Provenance: "",
|
||||||
|
},
|
||||||
|
err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
message: "parse incomplete quad",
|
||||||
|
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> .`,
|
||||||
|
expect: &graph.Triple{
|
||||||
|
Subject: "<http://example.org/bob#me>",
|
||||||
|
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
|
||||||
|
Object: "",
|
||||||
|
Provenance: "",
|
||||||
|
},
|
||||||
|
err: fmt.Errorf("%v: unexpected rune '.' at 78", ErrInvalid),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParse(t *testing.T) {
|
func TestParse(t *testing.T) {
|
||||||
for _, test := range testNTriples {
|
for _, test := range testNTriples {
|
||||||
got, err := Parse(test.input)
|
got, err := Parse(test.input)
|
||||||
if err != test.err {
|
if err != test.err && (err != nil && err.Error() != test.err.Error()) {
|
||||||
t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err)
|
t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err)
|
||||||
}
|
}
|
||||||
if !reflect.DeepEqual(got, test.expect) {
|
if !reflect.DeepEqual(got, test.expect) {
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"unicode"
|
||||||
|
|
||||||
"github.com/google/cayley/graph"
|
"github.com/google/cayley/graph"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue