Reorganize to go get will work
This makes almost no changes to source, but touches every almost file. Also fixes error in gremlin test code.
This commit is contained in:
parent
e46a5bbe4a
commit
e0df752618
130 changed files with 8766 additions and 10167 deletions
196
nquads/nquads.go
Normal file
196
nquads/nquads.go
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nquads
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
)
|
||||
|
||||
func isWhitespace(s uint8) bool {
|
||||
return (s == '\t' || s == '\r' || s == ' ')
|
||||
}
|
||||
func ParseLineToTriple(str string) *graph.Triple {
|
||||
// Skip leading whitespace.
|
||||
str = skipWhitespace(str)
|
||||
// Check for a comment
|
||||
if str != "" && str[0] == '#' {
|
||||
return nil
|
||||
}
|
||||
sub, remainder := getTripleComponent(str)
|
||||
if sub == nil {
|
||||
return nil
|
||||
}
|
||||
str = skipWhitespace(remainder)
|
||||
pred, remainder := getTripleComponent(str)
|
||||
if pred == nil {
|
||||
return nil
|
||||
}
|
||||
str = skipWhitespace(remainder)
|
||||
obj, remainder := getTripleComponent(str)
|
||||
if obj == nil {
|
||||
return nil
|
||||
}
|
||||
str = skipWhitespace(remainder)
|
||||
prov_ptr, remainder := getTripleComponent(str)
|
||||
var prov string
|
||||
if prov_ptr == nil {
|
||||
prov = ""
|
||||
} else {
|
||||
prov = *prov_ptr
|
||||
}
|
||||
str = skipWhitespace(remainder)
|
||||
if str != "" && str[0] == '.' {
|
||||
return graph.MakeTriple(*sub, *pred, *obj, prov)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func skipWhitespace(str string) string {
|
||||
i := 0
|
||||
for i < len(str) && isWhitespace(str[i]) {
|
||||
i += 1
|
||||
}
|
||||
return str[i:]
|
||||
}
|
||||
|
||||
func getTripleComponent(str string) (*string, string) {
|
||||
if len(str) == 0 {
|
||||
return nil, str
|
||||
}
|
||||
if str[0] == '<' {
|
||||
return getUriPart(str[1:])
|
||||
} else if str[0] == '"' {
|
||||
return getQuotedPart(str[1:])
|
||||
} else if str[0] == '.' {
|
||||
return nil, str
|
||||
} else {
|
||||
// Technically not part of the spec. But we do it anyway for convenience.
|
||||
return getUnquotedPart(str)
|
||||
}
|
||||
}
|
||||
|
||||
func getUriPart(str string) (*string, string) {
|
||||
i := 0
|
||||
for i < len(str) && str[i] != '>' {
|
||||
i += 1
|
||||
}
|
||||
if i == len(str) {
|
||||
return nil, str
|
||||
}
|
||||
part := str[0:i]
|
||||
return &part, str[i+1:]
|
||||
}
|
||||
|
||||
func getQuotedPart(str string) (*string, string) {
|
||||
i := 0
|
||||
start := 0
|
||||
out := ""
|
||||
for i < len(str) && str[i] != '"' {
|
||||
if str[i] == '\\' {
|
||||
out += str[start:i]
|
||||
switch str[i+1] {
|
||||
case '\\':
|
||||
out += "\\"
|
||||
case 'r':
|
||||
out += "\r"
|
||||
case 'n':
|
||||
out += "\n"
|
||||
case 't':
|
||||
out += "\t"
|
||||
case '"':
|
||||
out += "\""
|
||||
default:
|
||||
return nil, str
|
||||
}
|
||||
i += 2
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
if i == len(str) {
|
||||
return nil, str
|
||||
}
|
||||
out += str[start:i]
|
||||
i += 1
|
||||
var remainder string
|
||||
if strings.HasPrefix(str[i:], "^^<") {
|
||||
// Ignore type, for now
|
||||
_, remainder = getUriPart(str[i+3:])
|
||||
} else if strings.HasPrefix(str[i:], "@") {
|
||||
_, remainder = getUnquotedPart(str[i+1:])
|
||||
} else {
|
||||
remainder = str[i:]
|
||||
}
|
||||
|
||||
return &out, remainder
|
||||
}
|
||||
|
||||
func getUnquotedPart(str string) (*string, string) {
|
||||
i := 0
|
||||
initStr := str
|
||||
out := ""
|
||||
start := 0
|
||||
for i < len(str) && !isWhitespace(str[i]) {
|
||||
if str[i] == '"' {
|
||||
part, remainder := getQuotedPart(str[i+1:])
|
||||
if part == nil {
|
||||
return part, initStr
|
||||
}
|
||||
out += str[start:i]
|
||||
str = remainder
|
||||
i = 0
|
||||
start = 0
|
||||
out += *part
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
out += str[start:i]
|
||||
return &out, str[i:]
|
||||
}
|
||||
|
||||
func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) {
|
||||
bf := bufio.NewReader(reader)
|
||||
|
||||
nTriples := 0
|
||||
line := ""
|
||||
for {
|
||||
l, pre, err := bf.ReadLine()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
glog.Fatalln("Something bad happened while reading file " + err.Error())
|
||||
}
|
||||
line += string(l)
|
||||
if pre {
|
||||
continue
|
||||
}
|
||||
triple := ParseLineToTriple(line)
|
||||
line = ""
|
||||
if triple != nil {
|
||||
nTriples++
|
||||
c <- triple
|
||||
}
|
||||
}
|
||||
glog.Infoln("Read", nTriples, "triples")
|
||||
close(c)
|
||||
}
|
||||
131
nquads/nquads_test.go
Normal file
131
nquads/nquads_test.go
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nquads
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
)
|
||||
|
||||
func TestParsingNTriples(t *testing.T) {
|
||||
Convey("When parsing", t, func() {
|
||||
Convey("It should not parse invalid triples", func() {
|
||||
x := ParseLineToTriple("invalid")
|
||||
So(x, ShouldBeNil)
|
||||
})
|
||||
Convey("It should not parse comments", func() {
|
||||
x := ParseLineToTriple("# nominally valid triple .")
|
||||
So(x, ShouldBeNil)
|
||||
})
|
||||
Convey("It should parse simple triples", func() {
|
||||
x := ParseLineToTriple("this is valid .")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Sub, ShouldEqual, "this")
|
||||
})
|
||||
Convey("It should parse quoted triples", func() {
|
||||
x := ParseLineToTriple("this is \"valid too\" .")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "valid too")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
})
|
||||
Convey("It should parse escaped quoted triples", func() {
|
||||
x := ParseLineToTriple("he said \"\\\"That's all folks\\\"\" .")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "\"That's all folks\"")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
})
|
||||
|
||||
Convey("It should parse an example real triple", func() {
|
||||
x := ParseLineToTriple("\":/guid/9202a8c04000641f80000000010c843c\" \"name\" \"George Morris\" .")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "George Morris")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
})
|
||||
|
||||
Convey("It should parse a pathologically spaced triple", func() {
|
||||
x := ParseLineToTriple("foo is \"\\tA big tough\\r\\nDeal\\\\\" .")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "\tA big tough\r\nDeal\\")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
})
|
||||
|
||||
Convey("It should parse a simple quad", func() {
|
||||
x := ParseLineToTriple("this is valid quad .")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "valid")
|
||||
So(x.Provenance, ShouldEqual, "quad")
|
||||
})
|
||||
|
||||
Convey("It should parse a quoted quad", func() {
|
||||
x := ParseLineToTriple("this is valid \"quad thing\" .")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "valid")
|
||||
So(x.Provenance, ShouldEqual, "quad thing")
|
||||
})
|
||||
|
||||
Convey("It should parse crazy escaped quads", func() {
|
||||
x := ParseLineToTriple("\"\\\"this\" \"\\\"is\" \"\\\"valid\" \"\\\"quad thing\".")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Sub, ShouldEqual, "\"this")
|
||||
So(x.Pred, ShouldEqual, "\"is")
|
||||
So(x.Obj, ShouldEqual, "\"valid")
|
||||
So(x.Provenance, ShouldEqual, "\"quad thing")
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestParsingNTriplesOfficial(t *testing.T) {
|
||||
Convey("When using some public test cases...", t, func() {
|
||||
Convey("It should handle some simple cases with comments", func() {
|
||||
var x *graph.Triple
|
||||
x = ParseLineToTriple("<http://example/s> <http://example/p> <http://example/o> . # comment")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Sub, ShouldEqual, "http://example/s")
|
||||
So(x.Pred, ShouldEqual, "http://example/p")
|
||||
So(x.Obj, ShouldEqual, "http://example/o")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
x = ParseLineToTriple("<http://example/s> <http://example/p> _:o . # comment")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Sub, ShouldEqual, "http://example/s")
|
||||
So(x.Pred, ShouldEqual, "http://example/p")
|
||||
So(x.Obj, ShouldEqual, "_:o")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
x = ParseLineToTriple("<http://example/s> <http://example/p> \"o\" . # comment")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "o")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
x = ParseLineToTriple("<http://example/s> <http://example/p> \"o\"^^<http://example/dt> . # comment")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "o")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
x = ParseLineToTriple("<http://example/s> <http://example/p> \"o\"@en . # comment")
|
||||
So(x, ShouldNotBeNil)
|
||||
So(x.Obj, ShouldEqual, "o")
|
||||
So(x.Provenance, ShouldEqual, "")
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkParser(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
x := ParseLineToTriple("<http://example/s> <http://example/p> \"object of some real\\tlength\"@en . # comment")
|
||||
if x.Obj != "object of some real\tlength" {
|
||||
b.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue