This makes almost no changes to source, but touches every almost file. Also fixes error in gremlin test code.
196 lines
3.9 KiB
Go
196 lines
3.9 KiB
Go
// Copyright 2014 The Cayley Authors. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package nquads
|
|
|
|
import (
|
|
"bufio"
|
|
"io"
|
|
"strings"
|
|
|
|
"github.com/barakmich/glog"
|
|
|
|
"github.com/google/cayley/graph"
|
|
)
|
|
|
|
func isWhitespace(s uint8) bool {
|
|
return (s == '\t' || s == '\r' || s == ' ')
|
|
}
|
|
func ParseLineToTriple(str string) *graph.Triple {
|
|
// Skip leading whitespace.
|
|
str = skipWhitespace(str)
|
|
// Check for a comment
|
|
if str != "" && str[0] == '#' {
|
|
return nil
|
|
}
|
|
sub, remainder := getTripleComponent(str)
|
|
if sub == nil {
|
|
return nil
|
|
}
|
|
str = skipWhitespace(remainder)
|
|
pred, remainder := getTripleComponent(str)
|
|
if pred == nil {
|
|
return nil
|
|
}
|
|
str = skipWhitespace(remainder)
|
|
obj, remainder := getTripleComponent(str)
|
|
if obj == nil {
|
|
return nil
|
|
}
|
|
str = skipWhitespace(remainder)
|
|
prov_ptr, remainder := getTripleComponent(str)
|
|
var prov string
|
|
if prov_ptr == nil {
|
|
prov = ""
|
|
} else {
|
|
prov = *prov_ptr
|
|
}
|
|
str = skipWhitespace(remainder)
|
|
if str != "" && str[0] == '.' {
|
|
return graph.MakeTriple(*sub, *pred, *obj, prov)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func skipWhitespace(str string) string {
|
|
i := 0
|
|
for i < len(str) && isWhitespace(str[i]) {
|
|
i += 1
|
|
}
|
|
return str[i:]
|
|
}
|
|
|
|
func getTripleComponent(str string) (*string, string) {
|
|
if len(str) == 0 {
|
|
return nil, str
|
|
}
|
|
if str[0] == '<' {
|
|
return getUriPart(str[1:])
|
|
} else if str[0] == '"' {
|
|
return getQuotedPart(str[1:])
|
|
} else if str[0] == '.' {
|
|
return nil, str
|
|
} else {
|
|
// Technically not part of the spec. But we do it anyway for convenience.
|
|
return getUnquotedPart(str)
|
|
}
|
|
}
|
|
|
|
func getUriPart(str string) (*string, string) {
|
|
i := 0
|
|
for i < len(str) && str[i] != '>' {
|
|
i += 1
|
|
}
|
|
if i == len(str) {
|
|
return nil, str
|
|
}
|
|
part := str[0:i]
|
|
return &part, str[i+1:]
|
|
}
|
|
|
|
func getQuotedPart(str string) (*string, string) {
|
|
i := 0
|
|
start := 0
|
|
out := ""
|
|
for i < len(str) && str[i] != '"' {
|
|
if str[i] == '\\' {
|
|
out += str[start:i]
|
|
switch str[i+1] {
|
|
case '\\':
|
|
out += "\\"
|
|
case 'r':
|
|
out += "\r"
|
|
case 'n':
|
|
out += "\n"
|
|
case 't':
|
|
out += "\t"
|
|
case '"':
|
|
out += "\""
|
|
default:
|
|
return nil, str
|
|
}
|
|
i += 2
|
|
start = i
|
|
continue
|
|
}
|
|
i += 1
|
|
}
|
|
if i == len(str) {
|
|
return nil, str
|
|
}
|
|
out += str[start:i]
|
|
i += 1
|
|
var remainder string
|
|
if strings.HasPrefix(str[i:], "^^<") {
|
|
// Ignore type, for now
|
|
_, remainder = getUriPart(str[i+3:])
|
|
} else if strings.HasPrefix(str[i:], "@") {
|
|
_, remainder = getUnquotedPart(str[i+1:])
|
|
} else {
|
|
remainder = str[i:]
|
|
}
|
|
|
|
return &out, remainder
|
|
}
|
|
|
|
func getUnquotedPart(str string) (*string, string) {
|
|
i := 0
|
|
initStr := str
|
|
out := ""
|
|
start := 0
|
|
for i < len(str) && !isWhitespace(str[i]) {
|
|
if str[i] == '"' {
|
|
part, remainder := getQuotedPart(str[i+1:])
|
|
if part == nil {
|
|
return part, initStr
|
|
}
|
|
out += str[start:i]
|
|
str = remainder
|
|
i = 0
|
|
start = 0
|
|
out += *part
|
|
}
|
|
i += 1
|
|
}
|
|
out += str[start:i]
|
|
return &out, str[i:]
|
|
}
|
|
|
|
func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) {
|
|
bf := bufio.NewReader(reader)
|
|
|
|
nTriples := 0
|
|
line := ""
|
|
for {
|
|
l, pre, err := bf.ReadLine()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
glog.Fatalln("Something bad happened while reading file " + err.Error())
|
|
}
|
|
line += string(l)
|
|
if pre {
|
|
continue
|
|
}
|
|
triple := ParseLineToTriple(line)
|
|
line = ""
|
|
if triple != nil {
|
|
nTriples++
|
|
c <- triple
|
|
}
|
|
}
|
|
glog.Infoln("Read", nTriples, "triples")
|
|
close(c)
|
|
}
|