// Copyright 2014 The Cayley Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package nquads import ( "archive/tar" "compress/gzip" "fmt" "io" "os" "path/filepath" "reflect" "strings" "testing" "github.com/google/cayley/quad" ) var testNQuads = []struct { message string input string expect quad.Quad err error }{ // Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/. // _:100000 . # example from 30movies { message: "parse triple with commment", input: `_:100000 . # example from 30movies`, expect: quad.Quad{ Subject: "_:100000", Predicate: "", Object: "", Label: "", }, err: nil, }, // _:10011 "Tomás de Torquemada" . # example from 30movies with unicode { message: "parse triple with commment", input: `_:10011 "Tomás de Torquemada" . # example from 30movies with unicode`, expect: quad.Quad{ Subject: "_:10011", Predicate: "", Object: `"Tomás de Torquemada"`, Label: "", }, err: nil, }, // N-Triples example 1. { message: "parse triple with commment", input: ` . # comments here`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse triple with blank subject node, literal object and no comment (1)", input: `_:subject1 "object1" .`, expect: quad.Quad{ Subject: "_:subject1", Predicate: "", Object: `"object1"`, Label: "", }, err: nil, }, { message: "parse triple with blank subject node, literal object and no comment (2)", input: `_:subject2 "object2" .`, expect: quad.Quad{ Subject: "_:subject2", Predicate: "", Object: `"object2"`, Label: "", }, err: nil, }, // N-Triples example 2. { message: "parse triple with three IRIREFs", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, // N-Triples example 3. { message: "parse triple with blank node labelled subject and object and IRIREF predicate (1)", input: `_:alice _:bob .`, expect: quad.Quad{ Subject: "_:alice", Predicate: "", Object: "_:bob", Label: "", }, err: nil, }, { message: "parse triple with blank node labelled subject and object and IRIREF predicate (2)", input: `_:bob _:alice .`, expect: quad.Quad{ Subject: "_:bob", Predicate: "", Object: "_:alice", Label: "", }, err: nil, }, // N-Quads example 1. { message: "parse quad with commment", input: ` . # comments here`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse quad with blank subject node, literal object, IRIREF predicate and label, and no comment (1)", input: `_:subject1 "object1" .`, expect: quad.Quad{ Subject: "_:subject1", Predicate: "", Object: `"object1"`, Label: "", }, err: nil, }, { message: "parse quad with blank subject node, literal object, IRIREF predicate and label, and no comment (2)", input: `_:subject2 "object2" .`, expect: quad.Quad{ Subject: "_:subject2", Predicate: "", Object: `"object2"`, Label: "", }, err: nil, }, // N-Quads example 2. { message: "parse quad with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, // N-Quads example 3. { message: "parse quad with blank node labelled subject and object and IRIREF predicate and label (1)", input: `_:alice _:bob .`, expect: quad.Quad{ Subject: "_:alice", Predicate: "", Object: "_:bob", Label: "", }, err: nil, }, { message: "parse quad with blank node labelled subject and object and IRIREF predicate and label (2)", input: `_:bob _:alice .`, expect: quad.Quad{ Subject: "_:bob", Predicate: "", Object: "_:alice", Label: "", }, err: nil, }, // N-Triples tests. { message: "parse triple with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse triple with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse triple with IRIREF schema on literal object", input: ` "1990-07-04"^^ .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: `"1990-07-04"^^`, Label: "", }, err: nil, }, { message: "parse commented IRIREF in triple", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse triple with literal subject", input: ` "Mona Lisa" .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: `"Mona Lisa"`, Label: "", }, err: nil, }, { message: "parse triple with all IRIREF parts (1)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse triple with all IRIREF parts (2)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, // N-Quads tests. { message: "parse commented IRIREF in quad (1)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse quad with all IRIREF parts", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse quad with IRIREF schema on literal object", input: ` "1990-07-04"^^ .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: `"1990-07-04"^^`, Label: "", }, err: nil, }, { message: "parse commented IRIREF in quad (2)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse literal object and colon qualified label in quad", input: ` "Mona Lisa" .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: `"Mona Lisa"`, Label: "", }, err: nil, }, { message: "parse all IRIREF parts with colon qualified label in quad (1)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse all IRIREF parts with colon qualified label in quad (2)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse all IRIREF parts (quad section - 1)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, { message: "parse all IRIREF parts (quad section - 2)", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: nil, }, // Invalid input. { message: "parse empty", input: ``, expect: quad.Quad{}, err: quad.ErrIncomplete, }, { message: "parse commented", input: `# comment`, expect: quad.Quad{}, err: fmt.Errorf("%v: unexpected rune '#' at 0", quad.ErrInvalid), }, { message: "parse incomplete quad", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: fmt.Errorf("%v: unexpected rune '.' at 78", quad.ErrInvalid), }, { message: "parse incomplete quad", input: ` .`, expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: fmt.Errorf("%v: unexpected rune '.' at 78", quad.ErrInvalid), }, // Example quad from issue #140. { message: "parse incomplete quad", input: "\t\t.", expect: quad.Quad{ Subject: "", Predicate: "", Object: "", Label: "", }, err: fmt.Errorf("%v: unexpected rune '\"' at 99", quad.ErrInvalid), }, } func TestParse(t *testing.T) { for _, test := range testNQuads { got, err := Parse(test.input) if err != test.err && (err != nil && err.Error() != test.err.Error()) { t.Errorf("Unexpected error when %s: got:%v expect:%v", test.message, err, test.err) } if !reflect.DeepEqual(got, test.expect) { t.Errorf("Failed to %s, %q, got:%#v expect:%#v", test.message, test.input, got, test.expect) } } } // This is a sample taken from 30kmoviedata.nq. // It has intentional defects: // The second comment is inset one space and // the second line after that comment is blank. var document = `# first 10 lines of 30kmoviedata.nq _:100000 . _:100001 . _:100002 . _:100003 . _:100004 . _:100005 . _:100006 . _:100007 . _:100008 . _:100009 . #last ten lines of 30kmoviedata.nq "Bill Fishman" . . "Matthew J. Evans" . . "Nina Bonherry" . . "Bill Roberts" . . "Christopher Ashley" . . ` func TestDecoder(t *testing.T) { dec := NewDecoder(strings.NewReader(document)) var n int for { q, err := dec.Unmarshal() if err != nil { if err != io.EOF { t.Fatalf("Failed to read document: %v", err) } break } if q.Subject == "" || q.Predicate == "" || q.Object == "" { t.Errorf("Unexpected quad, got:%v", q) } n++ } if n != 20 { t.Errorf("Unexpected number of quads read, got:%d expect:20", n) } } func TestRDFWorkingGroupSuit(t *testing.T) { // These tests erroneously pass because the parser does not // perform semantic testing on the URI in the IRIRef as required // by the specification. So, we skip them. skip := map[string]bool{ // N-Triples. "nt-syntax-bad-uri-06.nt": true, "nt-syntax-bad-uri-07.nt": true, "nt-syntax-bad-uri-08.nt": true, "nt-syntax-bad-uri-09.nt": true, // N-Quads. "nq-syntax-bad-uri-01.nq": true, "nt-syntax-bad-uri-06.nq": true, "nt-syntax-bad-uri-07.nq": true, "nt-syntax-bad-uri-08.nq": true, "nt-syntax-bad-uri-09.nq": true, } for _, file := range []string{ filepath.Join("..", "ntriple_tests.tar.gz"), filepath.Join("..", "nquad_tests.tar.gz"), } { suite, err := os.Open(file) if err != nil { t.Fatalf("Failed to open test suite in %q: %v", file, err) } defer suite.Close() r, err := gzip.NewReader(suite) if err != nil { t.Fatalf("Failed to uncompress test suite in %q: %v", file, err) } tr := tar.NewReader(r) for { h, err := tr.Next() if err != nil { if err == io.EOF { break } t.Fatalf("Unexpected error while reading suite archive: %v", err) } h.Name = filepath.Base(h.Name) if (filepath.Ext(h.Name) != ".nt" && filepath.Ext(h.Name) != ".nq") || skip[h.Name] { continue } isBad := strings.Contains(h.Name, "bad") dec := NewDecoder(tr) for { _, err := dec.Unmarshal() if err == io.EOF { break } got := err == nil if got == isBad { t.Errorf("Unexpected error return for test suite item %q, got: %v", h.Name, err) } } } } } var escapeSequenceTests = []struct { input string expect string }{ {input: `\t`, expect: "\t"}, {input: `\b`, expect: "\b"}, {input: `\n`, expect: "\n"}, {input: `\r`, expect: "\r"}, {input: `\f`, expect: "\f"}, {input: `\\`, expect: "\\"}, {input: `\u00b7`, expect: "·"}, {input: `\U000000b7`, expect: "·"}, {input: `\t\u00b7`, expect: "\t·"}, {input: `\b\U000000b7`, expect: "\b·"}, {input: `\u00b7\n`, expect: "·\n"}, {input: `\U000000b7\r`, expect: "·\r"}, {input: `\u00b7\f\U000000b7`, expect: "·\f·"}, {input: `\U000000b7\\\u00b7`, expect: "·\\·"}, } func TestUnescape(t *testing.T) { for _, test := range escapeSequenceTests { got := unEscape([]rune(test.input), true) if got != test.expect { t.Errorf("Failed to properly unescape %q, got:%q expect:%q", test.input, got, test.expect) } } } var result quad.Quad func BenchmarkParser(b *testing.B) { for n := 0; n < b.N; n++ { result, _ = Parse(" \"object of some real\\tlength\"@en . # comment") } }