// Copyright 2014 The Cayley Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cquads import ( "archive/tar" "compress/gzip" "fmt" "io" "os" "path/filepath" "reflect" "strings" "testing" "github.com/google/cayley/quad" ) var testNQuads = []struct { message string input string expect quad.Quad err error }{ // Tests from original nquads. // NTriple tests. { message: "parse simple triples", input: "this is valid .", expect: quad.Quad{ Subject: "this", Predicate: "is", Object: "valid", Label: "", }, }, { message: "parse quoted triples", input: `this is "valid too" .`, expect: quad.Quad{ Subject: "this", Predicate: "is", Object: "valid too", Label: "", }, }, { message: "parse escaped quoted triples", input: `he said "\"That's all folks\"" .`, expect: quad.Quad{ Subject: "he", Predicate: "said", Object: `"That's all folks"`, Label: "", }, }, { message: "parse an example real triple", input: `":/guid/9202a8c04000641f80000000010c843c" "name" "George Morris" .`, expect: quad.Quad{ Subject: ":/guid/9202a8c04000641f80000000010c843c", Predicate: "name", Object: "George Morris", Label: "", }, }, { message: "parse a pathologically spaced triple", input: "foo is \"\\tA big tough\\r\\nDeal\\\\\" .", expect: quad.Quad{ Subject: "foo", Predicate: "is", Object: "\tA big tough\r\nDeal\\", Label: "", }, }, // NQuad tests. { message: "parse a simple quad", input: "this is valid quad .", expect: quad.Quad{ Subject: "this", Predicate: "is", Object: "valid", Label: "quad", }, }, { message: "parse a quoted quad", input: `this is valid "quad thing" .`, expect: quad.Quad{ Subject: "this", Predicate: "is", Object: "valid", Label: "quad thing", }, }, { message: "parse crazy escaped quads", input: `"\"this" "\"is" "\"valid" "\"quad thing".`, expect: quad.Quad{ Subject: `"this`, Predicate: `"is`, Object: `"valid`, Label: `"quad thing`, }, }, // NTriple official tests. { message: "handle simple case with comments", input: " . # comment", expect: quad.Quad{ Subject: "http://example/s", Predicate: "http://example/p", Object: "http://example/o", Label: "", }, }, { message: "handle simple case with comments", input: " _:o . # comment", expect: quad.Quad{ Subject: "http://example/s", Predicate: "http://example/p", Object: "_:o", Label: "", }, }, { message: "handle simple case with comments", input: " \"o\" . # comment", expect: quad.Quad{ Subject: "http://example/s", Predicate: "http://example/p", Object: "o", Label: "", }, }, { message: "handle simple case with comments", input: " \"o\"^^ . # comment", expect: quad.Quad{ Subject: "http://example/s", Predicate: "http://example/p", Object: `"o"^^`, Label: "", }, }, { message: "handle simple case with comments", input: " \"o\"@en . # comment", expect: quad.Quad{ Subject: "http://example/s", Predicate: "http://example/p", Object: `"o"@en`, Label: ""}, }, // Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/. // _:100000 . # example from 30movies { message: "parse triple with commment", input: `_:100000 . # example from 30movies`, expect: quad.Quad{ Subject: "_:100000", Predicate: "/film/performance/actor", Object: "/en/larry_fine_1902", Label: "", }, err: nil, }, // _:10011 "Tomás de Torquemada" . # example from 30movies with unicode { message: "parse triple with commment", input: `_:10011 "Tomás de Torquemada" . # example from 30movies with unicode`, expect: quad.Quad{ Subject: "_:10011", Predicate: "/film/performance/character", Object: "Tomás de Torquemada", Label: "", }, err: nil, }, // N-Triples example 1. { message: "parse triple with commment", input: ` . # comments here`, expect: quad.Quad{ Subject: "http://one.example/subject1", Predicate: "http://one.example/predicate1", Object: "http://one.example/object1", Label: "", }, err: nil, }, { message: "parse triple with blank subject node, literal object and no comment (1)", input: `_:subject1 "object1" .`, expect: quad.Quad{ Subject: "_:subject1", Predicate: "http://an.example/predicate1", Object: "object1", Label: "", }, err: nil, }, { message: "parse triple with blank subject node, literal object and no comment (2)", input: `_:subject2 "object2" .`, expect: quad.Quad{ Subject: "_:subject2", Predicate: "http://an.example/predicate2", Object: "object2", Label: "", }, err: nil, }, // N-Triples example 2. { message: "parse triple with three IRIREFs", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/#spiderman", Predicate: "http://www.perceive.net/schemas/relationship/enemyOf", Object: "http://example.org/#green-goblin", Label: "", }, err: nil, }, // N-Triples example 3. { message: "parse triple with blank node labelled subject and object and IRIREF predicate (1)", input: `_:alice _:bob .`, expect: quad.Quad{ Subject: "_:alice", Predicate: "http://xmlns.com/foaf/0.1/knows", Object: "_:bob", Label: "", }, err: nil, }, { message: "parse triple with blank node labelled subject and object and IRIREF predicate (2)", input: `_:bob _:alice .`, expect: quad.Quad{ Subject: "_:bob", Predicate: "http://xmlns.com/foaf/0.1/knows", Object: "_:alice", Label: "", }, err: nil, }, // N-Quads example 1. { message: "parse quad with commment", input: ` . # comments here`, expect: quad.Quad{ Subject: "http://one.example/subject1", Predicate: "http://one.example/predicate1", Object: "http://one.example/object1", Label: "http://example.org/graph3", }, err: nil, }, { message: "parse quad with blank subject node, literal object, IRIREF predicate and label, and no comment (1)", input: `_:subject1 "object1" .`, expect: quad.Quad{ Subject: "_:subject1", Predicate: "http://an.example/predicate1", Object: "object1", Label: "http://example.org/graph1", }, err: nil, }, { message: "parse quad with blank subject node, literal object, IRIREF predicate and label, and no comment (2)", input: `_:subject2 "object2" .`, expect: quad.Quad{ Subject: "_:subject2", Predicate: "http://an.example/predicate2", Object: "object2", Label: "http://example.org/graph5", }, err: nil, }, // N-Quads example 2. { message: "parse quad with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/#spiderman", Predicate: "http://www.perceive.net/schemas/relationship/enemyOf", Object: "http://example.org/#green-goblin", Label: "http://example.org/graphs/spiderman", }, err: nil, }, // N-Quads example 3. { message: "parse quad with blank node labelled subject and object and IRIREF predicate and label (1)", input: `_:alice _:bob .`, expect: quad.Quad{ Subject: "_:alice", Predicate: "http://xmlns.com/foaf/0.1/knows", Object: "_:bob", Label: "http://example.org/graphs/john", }, err: nil, }, { message: "parse quad with blank node labelled subject and object and IRIREF predicate and label (2)", input: `_:bob _:alice .`, expect: quad.Quad{ Subject: "_:bob", Predicate: "http://xmlns.com/foaf/0.1/knows", Object: "_:alice", Label: "http://example.org/graphs/james", }, err: nil, }, // N-Triples tests. { message: "parse triple with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", Object: "http://xmlns.com/foaf/0.1/Person", Label: "", }, err: nil, }, { message: "parse triple with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://xmlns.com/foaf/0.1/knows", Object: "http://example.org/alice#me", Label: "", }, err: nil, }, { message: "parse triple with IRIREF schema on literal object", input: ` "1990-07-04"^^ .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://schema.org/birthDate", Object: `"1990-07-04"^^`, Label: "", }, err: nil, }, { message: "parse commented IRIREF in triple", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://xmlns.com/foaf/0.1/topic_interest", Object: "http://www.wikidata.org/entity/Q12418", Label: "", }, err: nil, }, { message: "parse triple with literal subject", input: ` "Mona Lisa" .`, expect: quad.Quad{ Subject: "http://www.wikidata.org/entity/Q12418", Predicate: "http://purl.org/dc/terms/title", Object: "Mona Lisa", Label: "", }, err: nil, }, { message: "parse triple with all IRIREF parts (1)", input: ` .`, expect: quad.Quad{ Subject: "http://www.wikidata.org/entity/Q12418", Predicate: "http://purl.org/dc/terms/creator", Object: "http://dbpedia.org/resource/Leonardo_da_Vinci", Label: "", }, err: nil, }, { message: "parse triple with all IRIREF parts (2)", input: ` .`, expect: quad.Quad{ Subject: "http://data.europeana.eu/item/04802/243FA8618938F4117025F17A8B813C5F9AA4D619", Predicate: "http://purl.org/dc/terms/subject", Object: "http://www.wikidata.org/entity/Q12418", Label: "", }, err: nil, }, // N-Quads tests. { message: "parse commented IRIREF in quad (1)", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", Object: "http://xmlns.com/foaf/0.1/Person", Label: "http://example.org/bob", }, err: nil, }, { message: "parse quad with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://xmlns.com/foaf/0.1/knows", Object: "http://example.org/alice#me", Label: "http://example.org/bob", }, err: nil, }, { message: "parse quad with IRIREF schema on literal object", input: ` "1990-07-04"^^ .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://schema.org/birthDate", Object: `"1990-07-04"^^`, Label: "http://example.org/bob", }, err: nil, }, { message: "parse commented IRIREF in quad (2)", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://xmlns.com/foaf/0.1/topic_interest", Object: "http://www.wikidata.org/entity/Q12418", Label: "http://example.org/bob", }, err: nil, }, { message: "parse literal object and colon qualified label in quad", input: ` "Mona Lisa" .`, expect: quad.Quad{ Subject: "http://www.wikidata.org/entity/Q12418", Predicate: "http://purl.org/dc/terms/title", Object: "Mona Lisa", Label: "https://www.wikidata.org/wiki/Special:EntityData/Q12418", }, err: nil, }, { message: "parse all IRIREF parts with colon qualified label in quad (1)", input: ` .`, expect: quad.Quad{ Subject: "http://www.wikidata.org/entity/Q12418", Predicate: "http://purl.org/dc/terms/creator", Object: "http://dbpedia.org/resource/Leonardo_da_Vinci", Label: "https://www.wikidata.org/wiki/Special:EntityData/Q12418", }, err: nil, }, { message: "parse all IRIREF parts with colon qualified label in quad (2)", input: ` .`, expect: quad.Quad{ Subject: "http://data.europeana.eu/item/04802/243FA8618938F4117025F17A8B813C5F9AA4D619", Predicate: "http://purl.org/dc/terms/subject", Object: "http://www.wikidata.org/entity/Q12418", Label: "https://www.wikidata.org/wiki/Special:EntityData/Q12418", }, err: nil, }, { message: "parse all IRIREF parts (quad section - 1)", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob", Predicate: "http://purl.org/dc/terms/publisher", Object: "http://example.org", Label: "", }, err: nil, }, { message: "parse all IRIREF parts (quad section - 2)", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob", Predicate: "http://purl.org/dc/terms/rights", Object: "http://creativecommons.org/licenses/by/3.0/", Label: "", }, err: nil, }, // Invalid input. { message: "parse empty", input: ``, expect: quad.Quad{}, err: quad.ErrIncomplete, }, { message: "parse commented", input: `# is a comment`, expect: quad.Quad{}, err: fmt.Errorf("%v: unexpected rune '#' at 0", quad.ErrInvalid), }, { message: "parse commented internal (1)", input: `is # a comment`, expect: quad.Quad{Subject: "is"}, err: fmt.Errorf("%v: unexpected rune '#' at 3", quad.ErrInvalid), }, { message: "parse commented internal (2)", input: `is a # comment`, expect: quad.Quad{Subject: "is", Predicate: "a"}, err: fmt.Errorf("%v: unexpected rune '#' at 5", quad.ErrInvalid), }, { message: "parse incomplete quad (1)", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", Object: "", Label: "", }, err: quad.ErrIncomplete, }, { message: "parse incomplete quad (2)", input: ` .`, expect: quad.Quad{ Subject: "http://example.org/bob#me", Predicate: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", Object: "", Label: "", }, err: quad.ErrIncomplete, }, // Example quad from issue #140 in two forms: strict N-Quads and as quoted in issue. { message: "parse incomplete quad", input: "\t\t.", expect: quad.Quad{ Subject: "ns:m.0y_chx", Predicate: "ns:music.recording.lyrics_website..common.webpage.uri", Object: ".", expect: quad.Quad{ Subject: "ns:m.0y_chx", Predicate: "ns:music.recording.lyrics_website..common.webpage.uri", Object: " . _:100001 . _:100002 . _:100003 . _:100004 . _:100005 . _:100006 . _:100007 . _:100008 . _:100009 . #last ten lines of 30kmoviedata.nq "Bill Fishman" . . "Matthew J. Evans" . . "Nina Bonherry" . . "Bill Roberts" . . "Christopher Ashley" . . ` func TestDecoder(t *testing.T) { dec := NewDecoder(strings.NewReader(document)) var n int for { q, err := dec.Unmarshal() if err != nil { if err != io.EOF { t.Fatalf("Failed to read document: %v", err) } break } if q.Subject == "" || q.Predicate == "" || q.Object == "" { t.Errorf("Unexpected quad, got:%v", q) } n++ } if n != 20 { t.Errorf("Unexpected number of quads read, got:%d expect:20", n) } } func TestRDFWorkingGroupSuit(t *testing.T) { // Tests that are not passable by cquads parsing from the RDF // Working Group Suite: // // [1] Because we don't require literal quoting, we cannot // distinguish quad terms without separating whitespace. // // [2] The cquads grammer accepts these because of its relaxation. // // [3] These tests pass because the parser does not perform // semantic testing on the URI in the IRIRef as required by // the specification. skip := map[string]bool{ // N-Triples. // [1] "minimal_whitespace.nt": true, // [2] "nt-syntax-bad-num-01.nt": true, "nt-syntax-bad-num-02.nt": true, "nt-syntax-bad-num-03.nt": true, "nt-syntax-bad-prefix-01.nt": true, "nt-syntax-bad-string-02.nt": true, "nt-syntax-bad-string-03.nt": true, "nt-syntax-bad-string-04.nt": true, "nt-syntax-bad-struct-01.nt": true, "nt-syntax-bad-uri-01.nt": true, "nt-syntax-bad-uri-04.nt": true, // [3] "nt-syntax-bad-uri-06.nt": true, "nt-syntax-bad-uri-07.nt": true, "nt-syntax-bad-uri-08.nt": true, "nt-syntax-bad-uri-09.nt": true, // N-Quads. // [1] "minimal_whitespace.nq": true, // [2] "nq-syntax-bad-literal-01.nq": true, "nq-syntax-bad-literal-02.nq": true, "nq-syntax-bad-literal-03.nq": true, "nt-syntax-bad-num-01.nq": true, "nt-syntax-bad-num-02.nq": true, "nt-syntax-bad-num-03.nq": true, "nt-syntax-bad-prefix-01.nq": true, "nt-syntax-bad-string-02.nq": true, "nt-syntax-bad-string-03.nq": true, "nt-syntax-bad-string-04.nq": true, "nt-syntax-bad-struct-01.nq": true, "nt-syntax-bad-uri-01.nq": true, "nt-syntax-bad-uri-04.nq": true, // [3] "nq-syntax-bad-uri-01.nq": true, "nt-syntax-bad-uri-06.nq": true, "nt-syntax-bad-uri-07.nq": true, "nt-syntax-bad-uri-08.nq": true, "nt-syntax-bad-uri-09.nq": true, } for _, file := range []string{ filepath.Join("..", "ntriple_tests.tar.gz"), filepath.Join("..", "nquad_tests.tar.gz"), } { suite, err := os.Open(file) if err != nil { t.Fatalf("Failed to open test suite in %q: %v", file, err) } defer suite.Close() r, err := gzip.NewReader(suite) if err != nil { t.Fatalf("Failed to uncompress test suite in %q: %v", file, err) } tr := tar.NewReader(r) for { h, err := tr.Next() if err != nil { if err == io.EOF { break } t.Fatalf("Unexpected error while reading suite archive: %v", err) } h.Name = filepath.Base(h.Name) if (filepath.Ext(h.Name) != ".nt" && filepath.Ext(h.Name) != ".nq") || skip[h.Name] { continue } isBad := strings.Contains(h.Name, "bad") dec := NewDecoder(tr) for { _, err := dec.Unmarshal() if err == io.EOF { break } got := err == nil if got == isBad { t.Errorf("Unexpected error return for test suite item %q, got: %v", h.Name, err) } } } } } var escapeSequenceTests = []struct { input string expect string }{ {input: `\t`, expect: "\t"}, {input: `\b`, expect: "\b"}, {input: `\n`, expect: "\n"}, {input: `\r`, expect: "\r"}, {input: `\f`, expect: "\f"}, {input: `\\`, expect: "\\"}, {input: `\u00b7`, expect: "·"}, {input: `\U000000b7`, expect: "·"}, {input: `\t\u00b7`, expect: "\t·"}, {input: `\b\U000000b7`, expect: "\b·"}, {input: `\u00b7\n`, expect: "·\n"}, {input: `\U000000b7\r`, expect: "·\r"}, {input: `\u00b7\f\U000000b7`, expect: "·\f·"}, {input: `\U000000b7\\\u00b7`, expect: "·\\·"}, } func TestUnescape(t *testing.T) { for _, test := range escapeSequenceTests { got := unEscape([]rune(test.input), false, true) if got != test.expect { t.Errorf("Failed to properly unescape %q, got:%q expect:%q", test.input, got, test.expect) } } } var result quad.Quad func BenchmarkParser(b *testing.B) { for n := 0; n < b.N; n++ { result, _ = Parse(" \"object of some real\\tlength\"@en . # comment") } }