cayley/nquads/nquads.go
kortschak 1768e593a8 Move iterators into separate package
Also reduce API exposure and use standard library more - and fix bugs I
previously introduces in mongo.
2014-07-01 09:21:32 +09:30

196 lines
3.8 KiB
Go

// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nquads
import (
"bufio"
"io"
"strings"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
)
func isWhitespace(s uint8) bool {
return (s == '\t' || s == '\r' || s == ' ')
}
func Parse(str string) *graph.Triple {
// Skip leading whitespace.
str = skipWhitespace(str)
// Check for a comment
if str != "" && str[0] == '#' {
return nil
}
sub, remainder := getTripleComponent(str)
if sub == nil {
return nil
}
str = skipWhitespace(remainder)
pred, remainder := getTripleComponent(str)
if pred == nil {
return nil
}
str = skipWhitespace(remainder)
obj, remainder := getTripleComponent(str)
if obj == nil {
return nil
}
str = skipWhitespace(remainder)
prov_ptr, remainder := getTripleComponent(str)
var prov string
if prov_ptr == nil {
prov = ""
} else {
prov = *prov_ptr
}
str = skipWhitespace(remainder)
if str != "" && str[0] == '.' {
return &graph.Triple{*sub, *pred, *obj, prov}
}
return nil
}
func skipWhitespace(str string) string {
i := 0
for i < len(str) && isWhitespace(str[i]) {
i += 1
}
return str[i:]
}
func getTripleComponent(str string) (*string, string) {
if len(str) == 0 {
return nil, str
}
if str[0] == '<' {
return getUriPart(str[1:])
} else if str[0] == '"' {
return getQuotedPart(str[1:])
} else if str[0] == '.' {
return nil, str
} else {
// Technically not part of the spec. But we do it anyway for convenience.
return getUnquotedPart(str)
}
}
func getUriPart(str string) (*string, string) {
i := 0
for i < len(str) && str[i] != '>' {
i += 1
}
if i == len(str) {
return nil, str
}
part := str[0:i]
return &part, str[i+1:]
}
func getQuotedPart(str string) (*string, string) {
i := 0
start := 0
out := ""
for i < len(str) && str[i] != '"' {
if str[i] == '\\' {
out += str[start:i]
switch str[i+1] {
case '\\':
out += "\\"
case 'r':
out += "\r"
case 'n':
out += "\n"
case 't':
out += "\t"
case '"':
out += "\""
default:
return nil, str
}
i += 2
start = i
continue
}
i += 1
}
if i == len(str) {
return nil, str
}
out += str[start:i]
i += 1
var remainder string
if strings.HasPrefix(str[i:], "^^<") {
// Ignore type, for now
_, remainder = getUriPart(str[i+3:])
} else if strings.HasPrefix(str[i:], "@") {
_, remainder = getUnquotedPart(str[i+1:])
} else {
remainder = str[i:]
}
return &out, remainder
}
func getUnquotedPart(str string) (*string, string) {
i := 0
initStr := str
out := ""
start := 0
for i < len(str) && !isWhitespace(str[i]) {
if str[i] == '"' {
part, remainder := getQuotedPart(str[i+1:])
if part == nil {
return part, initStr
}
out += str[start:i]
str = remainder
i = 0
start = 0
out += *part
}
i += 1
}
out += str[start:i]
return &out, str[i:]
}
func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) {
bf := bufio.NewReader(reader)
nTriples := 0
line := ""
for {
l, pre, err := bf.ReadLine()
if err == io.EOF {
break
}
if err != nil {
glog.Fatalln("Something bad happened while reading file " + err.Error())
}
line += string(l)
if pre {
continue
}
triple := Parse(line)
line = ""
if triple != nil {
nTriples++
c <- triple
}
}
glog.Infoln("Read", nTriples, "triples")
close(c)
}