Generate code for parser
G2 code generation used after benchmarking. style benchmark old ns/op new ns/op delta T0 BenchmarkParser 672 5631 +737.95% T1 BenchmarkParser 672 5579 +730.21% G0 BenchmarkParser 672 4049 +502.53% G1 BenchmarkParser 672 3868 +475.60% G2 BenchmarkParser 672 3543 +427.23% F0 and F1 create massive Go source (6.0M) and so were not tested. Invalid tests removed, additional tests for invalid input to be added later.
This commit is contained in:
parent
92d50bb9f7
commit
22bad1701f
3 changed files with 3316 additions and 205 deletions
141
nquads/nquads.go
141
nquads/nquads.go
|
|
@ -19,7 +19,6 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
)
|
||||
|
|
@ -32,143 +31,11 @@ var (
|
|||
)
|
||||
|
||||
func Parse(str string) (*graph.Triple, error) {
|
||||
// Skip leading whitespace.
|
||||
str = trimSpace(str)
|
||||
// Check for a comment
|
||||
if str != "" && str[0] == '#' {
|
||||
return nil, nil
|
||||
t, err := parse([]rune(str))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sub, remainder := getTripleComponent(str)
|
||||
if sub == "" {
|
||||
return nil, ErrAbsentSubject
|
||||
}
|
||||
str = trimSpace(remainder)
|
||||
pred, remainder := getTripleComponent(str)
|
||||
if pred == "" {
|
||||
return nil, ErrAbsentPredicate
|
||||
}
|
||||
str = trimSpace(remainder)
|
||||
obj, remainder := getTripleComponent(str)
|
||||
if obj == "" {
|
||||
return nil, ErrAbsentObject
|
||||
}
|
||||
str = trimSpace(remainder)
|
||||
prov, remainder := getTripleComponent(str)
|
||||
str = trimSpace(remainder)
|
||||
if str != "" && str[0] == '.' {
|
||||
return &graph.Triple{sub, pred, obj, prov}, nil
|
||||
}
|
||||
return nil, ErrUnterminated
|
||||
}
|
||||
|
||||
func isSpace(s uint8) bool {
|
||||
return s == ' ' || s == '\t' || s == '\r'
|
||||
}
|
||||
|
||||
func trimSpace(str string) string {
|
||||
i := 0
|
||||
for i < len(str) && isSpace(str[i]) {
|
||||
i += 1
|
||||
}
|
||||
return str[i:]
|
||||
}
|
||||
|
||||
func getTripleComponent(str string) (head, tail string) {
|
||||
if len(str) == 0 {
|
||||
return "", str
|
||||
}
|
||||
if str[0] == '<' {
|
||||
return getUriPart(str[1:])
|
||||
} else if str[0] == '"' {
|
||||
return getQuotedPart(str[1:])
|
||||
} else if str[0] == '.' {
|
||||
return "", str
|
||||
} else {
|
||||
// Technically not part of the spec. But we do it anyway for convenience.
|
||||
return getUnquotedPart(str)
|
||||
}
|
||||
}
|
||||
|
||||
func getUriPart(str string) (head, tail string) {
|
||||
i := 0
|
||||
for i < len(str) && str[i] != '>' {
|
||||
i += 1
|
||||
}
|
||||
if i == len(str) {
|
||||
return "", str
|
||||
}
|
||||
head = str[0:i]
|
||||
return head, str[i+1:]
|
||||
}
|
||||
|
||||
func getQuotedPart(str string) (head, tail string) {
|
||||
var (
|
||||
i int
|
||||
start int
|
||||
)
|
||||
for i < len(str) && str[i] != '"' {
|
||||
if str[i] == '\\' {
|
||||
head += str[start:i]
|
||||
switch str[i+1] {
|
||||
case '\\':
|
||||
head += "\\"
|
||||
case 'r':
|
||||
head += "\r"
|
||||
case 'n':
|
||||
head += "\n"
|
||||
case 't':
|
||||
head += "\t"
|
||||
case '"':
|
||||
head += "\""
|
||||
default:
|
||||
return "", str
|
||||
}
|
||||
i += 2
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
if i == len(str) {
|
||||
return "", str
|
||||
}
|
||||
head += str[start:i]
|
||||
i += 1
|
||||
switch {
|
||||
case strings.HasPrefix(str[i:], "^^<"):
|
||||
// Ignore type, for now
|
||||
_, tail = getUriPart(str[i+3:])
|
||||
case str[i] == '@':
|
||||
_, tail = getUnquotedPart(str[i+1:])
|
||||
default:
|
||||
tail = str[i:]
|
||||
}
|
||||
|
||||
return head, tail
|
||||
}
|
||||
|
||||
func getUnquotedPart(str string) (head, tail string) {
|
||||
var (
|
||||
i int
|
||||
initStr = str
|
||||
start int
|
||||
)
|
||||
for i < len(str) && !isSpace(str[i]) {
|
||||
if str[i] == '"' {
|
||||
part, remainder := getQuotedPart(str[i+1:])
|
||||
if part == "" {
|
||||
return part, initStr
|
||||
}
|
||||
head += str[start:i]
|
||||
str = remainder
|
||||
i = 0
|
||||
start = 0
|
||||
head += part
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
head += str[start:i]
|
||||
return head, str[i:]
|
||||
return &t, nil
|
||||
}
|
||||
|
||||
type Decoder struct {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue