Generate code for parser

G2 code generation used after benchmarking.

style	benchmark           old ns/op     new ns/op     delta
T0		BenchmarkParser     672           5631          +737.95%
T1		BenchmarkParser     672           5579          +730.21%
G0		BenchmarkParser     672           4049          +502.53%
G1		BenchmarkParser     672           3868          +475.60%
G2		BenchmarkParser     672           3543          +427.23%

F0 and F1 create massive Go source (6.0M) and so were not tested.

Invalid tests removed, additional tests for invalid input to be added
later.
This commit is contained in:
kortschak 2014-07-24 21:25:14 +09:30
parent 92d50bb9f7
commit 22bad1701f
3 changed files with 3316 additions and 205 deletions

View file

@ -19,7 +19,6 @@ import (
"errors"
"fmt"
"io"
"strings"
"github.com/google/cayley/graph"
)
@ -32,143 +31,11 @@ var (
)
func Parse(str string) (*graph.Triple, error) {
// Skip leading whitespace.
str = trimSpace(str)
// Check for a comment
if str != "" && str[0] == '#' {
return nil, nil
t, err := parse([]rune(str))
if err != nil {
return nil, err
}
sub, remainder := getTripleComponent(str)
if sub == "" {
return nil, ErrAbsentSubject
}
str = trimSpace(remainder)
pred, remainder := getTripleComponent(str)
if pred == "" {
return nil, ErrAbsentPredicate
}
str = trimSpace(remainder)
obj, remainder := getTripleComponent(str)
if obj == "" {
return nil, ErrAbsentObject
}
str = trimSpace(remainder)
prov, remainder := getTripleComponent(str)
str = trimSpace(remainder)
if str != "" && str[0] == '.' {
return &graph.Triple{sub, pred, obj, prov}, nil
}
return nil, ErrUnterminated
}
func isSpace(s uint8) bool {
return s == ' ' || s == '\t' || s == '\r'
}
func trimSpace(str string) string {
i := 0
for i < len(str) && isSpace(str[i]) {
i += 1
}
return str[i:]
}
func getTripleComponent(str string) (head, tail string) {
if len(str) == 0 {
return "", str
}
if str[0] == '<' {
return getUriPart(str[1:])
} else if str[0] == '"' {
return getQuotedPart(str[1:])
} else if str[0] == '.' {
return "", str
} else {
// Technically not part of the spec. But we do it anyway for convenience.
return getUnquotedPart(str)
}
}
func getUriPart(str string) (head, tail string) {
i := 0
for i < len(str) && str[i] != '>' {
i += 1
}
if i == len(str) {
return "", str
}
head = str[0:i]
return head, str[i+1:]
}
func getQuotedPart(str string) (head, tail string) {
var (
i int
start int
)
for i < len(str) && str[i] != '"' {
if str[i] == '\\' {
head += str[start:i]
switch str[i+1] {
case '\\':
head += "\\"
case 'r':
head += "\r"
case 'n':
head += "\n"
case 't':
head += "\t"
case '"':
head += "\""
default:
return "", str
}
i += 2
start = i
continue
}
i += 1
}
if i == len(str) {
return "", str
}
head += str[start:i]
i += 1
switch {
case strings.HasPrefix(str[i:], "^^<"):
// Ignore type, for now
_, tail = getUriPart(str[i+3:])
case str[i] == '@':
_, tail = getUnquotedPart(str[i+1:])
default:
tail = str[i:]
}
return head, tail
}
func getUnquotedPart(str string) (head, tail string) {
var (
i int
initStr = str
start int
)
for i < len(str) && !isSpace(str[i]) {
if str[i] == '"' {
part, remainder := getQuotedPart(str[i+1:])
if part == "" {
return part, initStr
}
head += str[start:i]
str = remainder
i = 0
start = 0
head += part
}
i += 1
}
head += str[start:i]
return head, str[i:]
return &t, nil
}
type Decoder struct {

View file

@ -27,93 +27,330 @@ var testNTriples = []struct {
expect *graph.Triple
err error
}{
// NTriple tests.
// Tests taken from http://www.w3.org/TR/n-quads/ and http://www.w3.org/TR/n-triples/.
// N-Triples example 1.
{
message: "not parse invalid triples",
input: "invalid",
expect: nil,
err: ErrAbsentPredicate,
message: "parse triple with commment",
input: `<http://one.example/subject1> <http://one.example/predicate1> <http://one.example/object1> . # comments here`,
expect: &graph.Triple{
Subject: "<http://one.example/subject1>",
Predicate: "<http://one.example/predicate1>",
Object: "<http://one.example/object1>",
Provenance: "",
},
err: nil,
},
{
message: "invalid internal quote",
input: `":103032" "/film/performance/character" "Walter "Teacher" Cole" .`,
expect: nil,
err: ErrUnterminated,
message: "parse triple with blank subject node, literal object and no comment (1)",
input: `_:subject1 <http://an.example/predicate1> "object1" .`,
expect: &graph.Triple{
Subject: "_:subject1",
Predicate: "<http://an.example/predicate1>",
Object: `"object1"`,
Provenance: "",
},
err: nil,
},
{
message: "not parse comments",
input: "# nominally valid triple .",
expect: nil,
err: nil,
},
{
message: "parse simple triples",
input: "this is valid .",
expect: &graph.Triple{"this", "is", "valid", ""},
},
{
message: "parse quoted triples",
input: `this is "valid too" .`,
expect: &graph.Triple{"this", "is", "valid too", ""},
},
{
message: "parse escaped quoted triples",
input: `he said "\"That's all folks\"" .`,
expect: &graph.Triple{"he", "said", `"That's all folks"`, ""},
},
{
message: "parse an example real triple",
input: `":/guid/9202a8c04000641f80000000010c843c" "name" "George Morris" .`,
expect: &graph.Triple{":/guid/9202a8c04000641f80000000010c843c", "name", "George Morris", ""},
},
{
message: "parse a pathologically spaced triple",
input: "foo is \"\\tA big tough\\r\\nDeal\\\\\" .",
expect: &graph.Triple{"foo", "is", "\tA big tough\r\nDeal\\", ""},
message: "parse triple with blank subject node, literal object and no comment (2)",
input: `_:subject2 <http://an.example/predicate2> "object2" .`,
expect: &graph.Triple{
Subject: "_:subject2",
Predicate: "<http://an.example/predicate2>",
Object: `"object2"`,
Provenance: "",
},
err: nil,
},
// NQuad tests.
// N-Triples example 2.
{
message: "parse a simple quad",
input: "this is valid quad .",
expect: &graph.Triple{"this", "is", "valid", "quad"},
},
{
message: "parse a quoted quad",
input: `this is valid "quad thing" .`,
expect: &graph.Triple{"this", "is", "valid", "quad thing"},
},
{
message: "parse crazy escaped quads",
input: `"\"this" "\"is" "\"valid" "\"quad thing".`,
expect: &graph.Triple{`"this`, `"is`, `"valid`, `"quad thing`},
message: "parse triple with three IRIREFs",
input: `<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green-goblin> .`,
expect: &graph.Triple{
Subject: "<http://example.org/#spiderman>",
Predicate: "<http://www.perceive.net/schemas/relationship/enemyOf>",
Object: "<http://example.org/#green-goblin>",
Provenance: "",
},
err: nil,
},
// NTriple official tests.
// N-Triples example 3.
{
message: "handle simple case with comments",
input: "<http://example/s> <http://example/p> <http://example/o> . # comment",
expect: &graph.Triple{"http://example/s", "http://example/p", "http://example/o", ""},
message: "parse triple with blank node labelled subject and object and IRIREF predicate (1)",
input: `_:alice <http://xmlns.com/foaf/0.1/knows> _:bob .`,
expect: &graph.Triple{
Subject: "_:alice",
Predicate: "<http://xmlns.com/foaf/0.1/knows>",
Object: "_:bob",
Provenance: "",
},
err: nil,
},
{
message: "handle simple case with comments",
input: "<http://example/s> <http://example/p> _:o . # comment",
expect: &graph.Triple{"http://example/s", "http://example/p", "_:o", ""},
message: "parse triple with blank node labelled subject and object and IRIREF predicate (2)",
input: `_:bob <http://xmlns.com/foaf/0.1/knows> _:alice .`,
expect: &graph.Triple{
Subject: "_:bob",
Predicate: "<http://xmlns.com/foaf/0.1/knows>",
Object: "_:alice",
Provenance: "",
},
err: nil,
},
// N-Quads example 1.
{
message: "parse quad with commment",
input: `<http://one.example/subject1> <http://one.example/predicate1> <http://one.example/object1> <http://example.org/graph3> . # comments here`,
expect: &graph.Triple{
Subject: "<http://one.example/subject1>",
Predicate: "<http://one.example/predicate1>",
Object: "<http://one.example/object1>",
Provenance: "<http://example.org/graph3>",
},
err: nil,
},
{
message: "handle simple case with comments",
input: "<http://example/s> <http://example/p> \"o\" . # comment",
expect: &graph.Triple{"http://example/s", "http://example/p", "o", ""},
message: "parse quad with blank subject node, literal object, IRIREF predicate and label, and no comment (1)",
input: `_:subject1 <http://an.example/predicate1> "object1" <http://example.org/graph1> .`,
expect: &graph.Triple{
Subject: "_:subject1",
Predicate: "<http://an.example/predicate1>",
Object: `"object1"`,
Provenance: "<http://example.org/graph1>",
},
err: nil,
},
{
message: "handle simple case with comments",
input: "<http://example/s> <http://example/p> \"o\"^^<http://example/dt> . # comment",
expect: &graph.Triple{"http://example/s", "http://example/p", "o", ""},
message: "parse quad with blank subject node, literal object, IRIREF predicate and label, and no comment (2)",
input: `_:subject2 <http://an.example/predicate2> "object2" <http://example.org/graph5> .`,
expect: &graph.Triple{
Subject: "_:subject2",
Predicate: "<http://an.example/predicate2>",
Object: `"object2"`,
Provenance: "<http://example.org/graph5>",
},
err: nil,
},
// N-Quads example 2.
{
message: "parse quad with all IRIREF parts",
input: `<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green-goblin> <http://example.org/graphs/spiderman> .`,
expect: &graph.Triple{
Subject: "<http://example.org/#spiderman>",
Predicate: "<http://www.perceive.net/schemas/relationship/enemyOf>",
Object: "<http://example.org/#green-goblin>",
Provenance: "<http://example.org/graphs/spiderman>",
},
err: nil,
},
// N-Quads example 3.
{
message: "parse quad with blank node labelled subject and object and IRIREF predicate and label (1)",
input: `_:alice <http://xmlns.com/foaf/0.1/knows> _:bob <http://example.org/graphs/john> .`,
expect: &graph.Triple{
Subject: "_:alice",
Predicate: "<http://xmlns.com/foaf/0.1/knows>",
Object: "_:bob",
Provenance: "<http://example.org/graphs/john>",
},
err: nil,
},
{
message: "handle simple case with comments",
input: "<http://example/s> <http://example/p> \"o\"@en . # comment",
expect: &graph.Triple{"http://example/s", "http://example/p", "o", ""},
message: "parse quad with blank node labelled subject and object and IRIREF predicate and label (2)",
input: `_:bob <http://xmlns.com/foaf/0.1/knows> _:alice <http://example.org/graphs/james> .`,
expect: &graph.Triple{
Subject: "_:bob",
Predicate: "<http://xmlns.com/foaf/0.1/knows>",
Object: "_:alice",
Provenance: "<http://example.org/graphs/james>",
},
err: nil,
},
// N-Triples tests.
{
message: "parse triple with all IRIREF parts",
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
Object: "<http://xmlns.com/foaf/0.1/Person>",
Provenance: "",
},
err: nil,
},
{
message: "parse triple with all IRIREF parts",
input: `<http://example.org/bob#me> <http://xmlns.com/foaf/0.1/knows> <http://example.org/alice#me> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://xmlns.com/foaf/0.1/knows>",
Object: "<http://example.org/alice#me>",
Provenance: "",
},
err: nil,
},
{
message: "parse triple with IRIREF schema on literal object",
input: `<http://example.org/bob#me> <http://schema.org/birthDate> "1990-07-04"^^<http://www.w3.org/2001/XMLSchema#date> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://schema.org/birthDate>",
Object: `"1990-07-04"^^<http://www.w3.org/2001/XMLSchema#date>`,
Provenance: "",
},
err: nil,
},
{
message: "parse commented IRIREF in triple",
input: `<http://example.org/bob#me> <http://xmlns.com/foaf/0.1/topic_interest> <http://www.wikidata.org/entity/Q12418> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://xmlns.com/foaf/0.1/topic_interest>",
Object: "<http://www.wikidata.org/entity/Q12418>",
Provenance: "",
},
err: nil,
},
{
message: "parse triple with literal subject",
input: `<http://www.wikidata.org/entity/Q12418> <http://purl.org/dc/terms/title> "Mona Lisa" .`,
expect: &graph.Triple{
Subject: "<http://www.wikidata.org/entity/Q12418>",
Predicate: "<http://purl.org/dc/terms/title>",
Object: `"Mona Lisa"`,
Provenance: "",
},
err: nil,
},
{
message: "parse triple with all IRIREF parts (1)",
input: `<http://www.wikidata.org/entity/Q12418> <http://purl.org/dc/terms/creator> <http://dbpedia.org/resource/Leonardo_da_Vinci> .`,
expect: &graph.Triple{
Subject: "<http://www.wikidata.org/entity/Q12418>",
Predicate: "<http://purl.org/dc/terms/creator>",
Object: "<http://dbpedia.org/resource/Leonardo_da_Vinci>",
Provenance: "",
},
err: nil,
},
{
message: "parse triple with all IRIREF parts (2)",
input: `<http://data.europeana.eu/item/04802/243FA8618938F4117025F17A8B813C5F9AA4D619> <http://purl.org/dc/terms/subject> <http://www.wikidata.org/entity/Q12418> .`,
expect: &graph.Triple{
Subject: "<http://data.europeana.eu/item/04802/243FA8618938F4117025F17A8B813C5F9AA4D619>",
Predicate: "<http://purl.org/dc/terms/subject>",
Object: "<http://www.wikidata.org/entity/Q12418>",
Provenance: "",
},
err: nil,
},
// N-Quads tests.
{
message: "parse commented IRIREF in quad (1)",
input: `<http://example.org/bob#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://example.org/bob> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
Object: "<http://xmlns.com/foaf/0.1/Person>",
Provenance: "<http://example.org/bob>",
},
err: nil,
},
{
message: "parse quad with all IRIREF parts",
input: `<http://example.org/bob#me> <http://xmlns.com/foaf/0.1/knows> <http://example.org/alice#me> <http://example.org/bob> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://xmlns.com/foaf/0.1/knows>",
Object: "<http://example.org/alice#me>",
Provenance: "<http://example.org/bob>",
},
err: nil,
},
{
message: "parse quad with IRIREF schema on literal object",
input: `<http://example.org/bob#me> <http://schema.org/birthDate> "1990-07-04"^^<http://www.w3.org/2001/XMLSchema#date> <http://example.org/bob> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://schema.org/birthDate>",
Object: `"1990-07-04"^^<http://www.w3.org/2001/XMLSchema#date>`,
Provenance: "<http://example.org/bob>",
},
err: nil,
},
{
message: "parse commented IRIREF in quad (2)",
input: `<http://example.org/bob#me> <http://xmlns.com/foaf/0.1/topic_interest> <http://www.wikidata.org/entity/Q12418> <http://example.org/bob> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob#me>",
Predicate: "<http://xmlns.com/foaf/0.1/topic_interest>",
Object: "<http://www.wikidata.org/entity/Q12418>",
Provenance: "<http://example.org/bob>",
},
err: nil,
},
{
message: "parse literal object and colon qualified label in quad",
input: `<http://www.wikidata.org/entity/Q12418> <http://purl.org/dc/terms/title> "Mona Lisa" <https://www.wikidata.org/wiki/Special:EntityData/Q12418> .`,
expect: &graph.Triple{
Subject: "<http://www.wikidata.org/entity/Q12418>",
Predicate: "<http://purl.org/dc/terms/title>",
Object: `"Mona Lisa"`,
Provenance: "<https://www.wikidata.org/wiki/Special:EntityData/Q12418>",
},
err: nil,
},
{
message: "parse all IRIREF parts with colon qualified label in quad (1)",
input: `<http://www.wikidata.org/entity/Q12418> <http://purl.org/dc/terms/creator> <http://dbpedia.org/resource/Leonardo_da_Vinci> <https://www.wikidata.org/wiki/Special:EntityData/Q12418> .`,
expect: &graph.Triple{
Subject: "<http://www.wikidata.org/entity/Q12418>",
Predicate: "<http://purl.org/dc/terms/creator>",
Object: "<http://dbpedia.org/resource/Leonardo_da_Vinci>",
Provenance: "<https://www.wikidata.org/wiki/Special:EntityData/Q12418>",
},
err: nil,
},
{
message: "parse all IRIREF parts with colon qualified label in quad (2)",
input: `<http://data.europeana.eu/item/04802/243FA8618938F4117025F17A8B813C5F9AA4D619> <http://purl.org/dc/terms/subject> <http://www.wikidata.org/entity/Q12418> <https://www.wikidata.org/wiki/Special:EntityData/Q12418> .`,
expect: &graph.Triple{
Subject: "<http://data.europeana.eu/item/04802/243FA8618938F4117025F17A8B813C5F9AA4D619>",
Predicate: "<http://purl.org/dc/terms/subject>",
Object: "<http://www.wikidata.org/entity/Q12418>",
Provenance: "<https://www.wikidata.org/wiki/Special:EntityData/Q12418>",
},
err: nil,
},
{
message: "parse all IRIREF parts (quad section - 1)",
input: `<http://example.org/bob> <http://purl.org/dc/terms/publisher> <http://example.org> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob>",
Predicate: "<http://purl.org/dc/terms/publisher>",
Object: "<http://example.org>",
Provenance: "",
},
err: nil,
},
{
message: "parse all IRIREF parts (quad section - 2)",
input: `<http://example.org/bob> <http://purl.org/dc/terms/rights> <http://creativecommons.org/licenses/by/3.0/> .`,
expect: &graph.Triple{
Subject: "<http://example.org/bob>",
Predicate: "<http://purl.org/dc/terms/rights>",
Object: "<http://creativecommons.org/licenses/by/3.0/>",
Provenance: "",
},
err: nil,
},
}

3007
nquads/parse.go Normal file

File diff suppressed because it is too large Load diff