Create quads hierarchy
* Move nquads into quad. * Create cquads simplified parser in quad. * Move Triple (renamed Quad) to quad. Also made sure mongo actually implements BulkLoader.
This commit is contained in:
parent
01bc63810b
commit
401c58426f
51 changed files with 13400 additions and 5495 deletions
95
quad/cquads/actions.rl
Normal file
95
quad/cquads/actions.rl
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
action Escape {
|
||||
isEscaped = true
|
||||
}
|
||||
|
||||
action Quote {
|
||||
isQuoted = true
|
||||
}
|
||||
|
||||
action StartSubject {
|
||||
subject = p
|
||||
}
|
||||
|
||||
action StartPredicate {
|
||||
predicate = p
|
||||
}
|
||||
|
||||
action StartObject {
|
||||
object = p
|
||||
}
|
||||
|
||||
action StartLabel {
|
||||
label = p
|
||||
}
|
||||
|
||||
action SetSubject {
|
||||
if subject < 0 {
|
||||
panic("unexpected parser state: subject start not set")
|
||||
}
|
||||
q.Subject = unEscape(data[subject:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action SetPredicate {
|
||||
if predicate < 0 {
|
||||
panic("unexpected parser state: predicate start not set")
|
||||
}
|
||||
q.Predicate = unEscape(data[predicate:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action SetObject {
|
||||
if object < 0 {
|
||||
panic("unexpected parser state: object start not set")
|
||||
}
|
||||
q.Object = unEscape(data[object:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action SetLabel {
|
||||
if label < 0 {
|
||||
panic("unexpected parser state: label start not set")
|
||||
}
|
||||
q.Provenance = unEscape(data[label:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action Return {
|
||||
return q, nil
|
||||
}
|
||||
|
||||
action Comment {
|
||||
}
|
||||
|
||||
action Error {
|
||||
if p < len(data) {
|
||||
if r := data[p]; r < unicode.MaxASCII {
|
||||
return q, fmt.Errorf("%v: unexpected rune %q at %d", quad.ErrInvalid, data[p], p)
|
||||
} else {
|
||||
return q, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", quad.ErrInvalid, data[p], data[p], p)
|
||||
}
|
||||
}
|
||||
return q, quad.ErrIncomplete
|
||||
}
|
||||
}%%
|
||||
141
quad/cquads/cquads.go
Normal file
141
quad/cquads/cquads.go
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package cquads implements parsing N-Quads like line-based syntax
|
||||
// for RDF datasets.
|
||||
//
|
||||
// N-Quad parsing is performed as based on a simplified grammar derived from
|
||||
// the N-Quads grammar defined by http://www.w3.org/TR/n-quads/.
|
||||
//
|
||||
// For a complete definition of the grammar, see cquads.rl.
|
||||
package cquads
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/google/cayley/quad"
|
||||
)
|
||||
|
||||
// Parse returns a valid quad.Quad or a non-nil error. Parse does
|
||||
// handle comments except where the comment placement does not prevent
|
||||
// a complete valid quad.Quad from being defined.
|
||||
func Parse(str string) (*quad.Quad, error) {
|
||||
q, err := parse([]rune(str))
|
||||
return &q, err
|
||||
}
|
||||
|
||||
// Decoder implements simplified N-Quad document parsing.
|
||||
type Decoder struct {
|
||||
r *bufio.Reader
|
||||
line []byte
|
||||
}
|
||||
|
||||
// NewDecoder returns an N-Quad decoder that takes its input from the
|
||||
// provided io.Reader.
|
||||
func NewDecoder(r io.Reader) *Decoder {
|
||||
return &Decoder{r: bufio.NewReader(r)}
|
||||
}
|
||||
|
||||
// Unmarshal returns the next valid N-Quad as a quad.Quad, or an error.
|
||||
func (dec *Decoder) Unmarshal() (*quad.Quad, error) {
|
||||
dec.line = dec.line[:0]
|
||||
var line []byte
|
||||
for {
|
||||
for {
|
||||
l, pre, err := dec.r.ReadLine()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dec.line = append(dec.line, l...)
|
||||
if !pre {
|
||||
break
|
||||
}
|
||||
}
|
||||
if line = bytes.TrimSpace(dec.line); len(line) != 0 && line[0] != '#' {
|
||||
break
|
||||
}
|
||||
dec.line = dec.line[:0]
|
||||
}
|
||||
triple, err := Parse(string(line))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err)
|
||||
}
|
||||
if triple == nil {
|
||||
return dec.Unmarshal()
|
||||
}
|
||||
return triple, nil
|
||||
}
|
||||
|
||||
func unEscape(r []rune, isQuoted, isEscaped bool) string {
|
||||
if isQuoted {
|
||||
r = r[1 : len(r)-1]
|
||||
}
|
||||
if !isEscaped {
|
||||
return string(r)
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(make([]byte, 0, len(r)))
|
||||
|
||||
for i := 0; i < len(r); {
|
||||
switch r[i] {
|
||||
case '\\':
|
||||
i++
|
||||
var c byte
|
||||
switch r[i] {
|
||||
case 't':
|
||||
c = '\t'
|
||||
case 'b':
|
||||
c = '\b'
|
||||
case 'n':
|
||||
c = '\n'
|
||||
case 'r':
|
||||
c = '\r'
|
||||
case 'f':
|
||||
c = '\f'
|
||||
case '"':
|
||||
c = '"'
|
||||
case '\'':
|
||||
c = '\''
|
||||
case '\\':
|
||||
c = '\\'
|
||||
case 'u':
|
||||
rc, err := strconv.ParseInt(string(r[i+1:i+5]), 16, 32)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("internal parser error: %v", err))
|
||||
}
|
||||
buf.WriteRune(rune(rc))
|
||||
i += 5
|
||||
continue
|
||||
case 'U':
|
||||
rc, err := strconv.ParseInt(string(r[i+1:i+9]), 16, 32)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("internal parser error: %v", err))
|
||||
}
|
||||
buf.WriteRune(rune(rc))
|
||||
i += 9
|
||||
continue
|
||||
}
|
||||
buf.WriteByte(c)
|
||||
default:
|
||||
buf.WriteRune(r[i])
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
106
quad/cquads/cquads.rl
Normal file
106
quad/cquads/cquads.rl
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Ragel gramar definition derived from http://www.w3.org/TR/n-quads/#sec-grammar.
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
alphtype rune;
|
||||
|
||||
PN_CHARS_BASE = [A-Za-z]
|
||||
| 0x00c0 .. 0x00d6
|
||||
| 0x00d8 .. 0x00f6
|
||||
| 0x00f8 .. 0x02ff
|
||||
| 0x0370 .. 0x037d
|
||||
| 0x037f .. 0x1fff
|
||||
| 0x200c .. 0x200d
|
||||
| 0x2070 .. 0x218f
|
||||
| 0x2c00 .. 0x2fef
|
||||
| 0x3001 .. 0xd7ff
|
||||
| 0xf900 .. 0xfdcf
|
||||
| 0xfdf0 .. 0xfffd
|
||||
| 0x10000 .. 0xeffff
|
||||
;
|
||||
|
||||
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
|
||||
|
||||
PN_CHARS = PN_CHARS_U
|
||||
| '-'
|
||||
| [0-9]
|
||||
| 0xb7
|
||||
| 0x0300 .. 0x036f
|
||||
| 0x203f .. 0x2040
|
||||
;
|
||||
|
||||
ECHAR = ('\\' [tbnrf"'\\]) %Escape ;
|
||||
|
||||
UCHAR = ('\\u' xdigit {4}
|
||||
| '\\U' xdigit {8}) %Escape
|
||||
;
|
||||
|
||||
BLANK_NODE_LABEL = '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? ;
|
||||
|
||||
STRING_LITERAL = (
|
||||
'!'
|
||||
| '#' .. '['
|
||||
| ']' .. 0x7e
|
||||
| 0x80 .. 0x10ffff
|
||||
| ECHAR
|
||||
| UCHAR)+ - ('_:' | any* '.' | '#' any*)
|
||||
;
|
||||
|
||||
STRING_LITERAL_QUOTE = '"' (
|
||||
0x00 .. 0x09
|
||||
| 0x0b .. 0x0c
|
||||
| 0x0e .. '!'
|
||||
| '#' .. '['
|
||||
| ']' .. 0x10ffff
|
||||
| ECHAR
|
||||
| UCHAR)*
|
||||
'"'
|
||||
;
|
||||
|
||||
IRIREF = '<' (
|
||||
'!' .. ';'
|
||||
| '='
|
||||
| '?' .. '['
|
||||
| ']'
|
||||
| '_'
|
||||
| 'a' .. 'z'
|
||||
| '~'
|
||||
| 0x80 .. 0x10ffff
|
||||
| UCHAR)*
|
||||
'>'
|
||||
;
|
||||
|
||||
LANGTAG = '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* ;
|
||||
|
||||
whitespace = [ \t] ;
|
||||
|
||||
literal = STRING_LITERAL | STRING_LITERAL_QUOTE % Quote | STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG) ;
|
||||
|
||||
subject = (literal | BLANK_NODE_LABEL) ;
|
||||
predicate = literal ;
|
||||
object = (literal | BLANK_NODE_LABEL) ;
|
||||
graphLabel = (literal | BLANK_NODE_LABEL) ;
|
||||
|
||||
statement := (
|
||||
whitespace* subject >StartSubject %SetSubject
|
||||
whitespace+ predicate >StartPredicate %SetPredicate
|
||||
whitespace+ object >StartObject %SetObject
|
||||
(whitespace+ graphLabel >StartLabel %SetLabel)?
|
||||
whitespace* '.' whitespace* ('#' any*)? >Comment
|
||||
) %Return @!Error ;
|
||||
}%%
|
||||
782
quad/cquads/cquads_test.go
Normal file
782
quad/cquads/cquads_test.go
Normal file
File diff suppressed because it is too large
Load diff
6692
quad/cquads/parse.go
Normal file
6692
quad/cquads/parse.go
Normal file
File diff suppressed because it is too large
Load diff
58
quad/cquads/parse.rl
Normal file
58
quad/cquads/parse.rl
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
// GO SOURCE FILE MACHINE GENERATED BY RAGEL; DO NOT EDIT
|
||||
|
||||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cquads
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
|
||||
"github.com/google/cayley/quad"
|
||||
)
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
include "actions.rl";
|
||||
|
||||
include "cquads.rl";
|
||||
|
||||
write data;
|
||||
}%%
|
||||
|
||||
func parse(data []rune) (quad.Quad, error) {
|
||||
var (
|
||||
cs, p int
|
||||
pe = len(data)
|
||||
eof = pe
|
||||
|
||||
subject = -1
|
||||
predicate = -1
|
||||
object = -1
|
||||
label = -1
|
||||
|
||||
isEscaped bool
|
||||
isQuoted bool
|
||||
|
||||
q quad.Quad
|
||||
)
|
||||
|
||||
%%write init;
|
||||
|
||||
%%write exec;
|
||||
|
||||
return quad.Quad{}, quad.ErrInvalid
|
||||
}
|
||||
BIN
quad/nquad_tests.tar.gz
Normal file
BIN
quad/nquad_tests.tar.gz
Normal file
Binary file not shown.
87
quad/nquads/actions.rl
Normal file
87
quad/nquads/actions.rl
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
action Escape {
|
||||
isEscaped = true
|
||||
}
|
||||
|
||||
action StartSubject {
|
||||
subject = p
|
||||
}
|
||||
|
||||
action StartPredicate {
|
||||
predicate = p
|
||||
}
|
||||
|
||||
action StartObject {
|
||||
object = p
|
||||
}
|
||||
|
||||
action StartLabel {
|
||||
label = p
|
||||
}
|
||||
|
||||
action SetSubject {
|
||||
if subject < 0 {
|
||||
panic("unexpected parser state: subject start not set")
|
||||
}
|
||||
q.Subject = unEscape(data[subject:p], isEscaped)
|
||||
isEscaped = false
|
||||
}
|
||||
|
||||
action SetPredicate {
|
||||
if predicate < 0 {
|
||||
panic("unexpected parser state: predicate start not set")
|
||||
}
|
||||
q.Predicate = unEscape(data[predicate:p], isEscaped)
|
||||
isEscaped = false
|
||||
}
|
||||
|
||||
action SetObject {
|
||||
if object < 0 {
|
||||
panic("unexpected parser state: object start not set")
|
||||
}
|
||||
q.Object = unEscape(data[object:p], isEscaped)
|
||||
isEscaped = false
|
||||
}
|
||||
|
||||
action SetLabel {
|
||||
if label < 0 {
|
||||
panic("unexpected parser state: label start not set")
|
||||
}
|
||||
q.Provenance = unEscape(data[label:p], isEscaped)
|
||||
isEscaped = false
|
||||
}
|
||||
|
||||
action Return {
|
||||
return q, nil
|
||||
}
|
||||
|
||||
action Comment {
|
||||
}
|
||||
|
||||
action Error {
|
||||
if p < len(data) {
|
||||
if r := data[p]; r < unicode.MaxASCII {
|
||||
return q, fmt.Errorf("%v: unexpected rune %q at %d", quad.ErrInvalid, data[p], p)
|
||||
} else {
|
||||
return q, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", quad.ErrInvalid, data[p], data[p], p)
|
||||
}
|
||||
}
|
||||
return q, quad.ErrIncomplete
|
||||
}
|
||||
}%%
|
||||
138
quad/nquads/nquads.go
Normal file
138
quad/nquads/nquads.go
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package nquads implements parsing the RDF 1.1 N-Quads line-based syntax
|
||||
// for RDF datasets.
|
||||
//
|
||||
// N-Quad parsing is performed as defined by http://www.w3.org/TR/n-quads/
|
||||
// with the exception that the nquads package will allow relative IRI values,
|
||||
// which are prohibited by the N-Quads quad-Quads specifications.
|
||||
package nquads
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/google/cayley/quad"
|
||||
)
|
||||
|
||||
// Parse returns a valid quad.Quad or a non-nil error. Parse does
|
||||
// handle comments except where the comment placement does not prevent
|
||||
// a complete valid quad.Quad from being defined.
|
||||
func Parse(str string) (*quad.Quad, error) {
|
||||
q, err := parse([]rune(str))
|
||||
return &q, err
|
||||
}
|
||||
|
||||
// Decoder implements N-Quad document parsing according to the RDF
|
||||
// 1.1 N-Quads specification.
|
||||
type Decoder struct {
|
||||
r *bufio.Reader
|
||||
line []byte
|
||||
}
|
||||
|
||||
// NewDecoder returns an N-Quad decoder that takes its input from the
|
||||
// provided io.Reader.
|
||||
func NewDecoder(r io.Reader) *Decoder {
|
||||
return &Decoder{r: bufio.NewReader(r)}
|
||||
}
|
||||
|
||||
// Unmarshal returns the next valid N-Quad as a quad.Quad, or an error.
|
||||
func (dec *Decoder) Unmarshal() (*quad.Quad, error) {
|
||||
dec.line = dec.line[:0]
|
||||
var line []byte
|
||||
for {
|
||||
for {
|
||||
l, pre, err := dec.r.ReadLine()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dec.line = append(dec.line, l...)
|
||||
if !pre {
|
||||
break
|
||||
}
|
||||
}
|
||||
if line = bytes.TrimSpace(dec.line); len(line) != 0 && line[0] != '#' {
|
||||
break
|
||||
}
|
||||
dec.line = dec.line[:0]
|
||||
}
|
||||
triple, err := Parse(string(line))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err)
|
||||
}
|
||||
if triple == nil {
|
||||
return dec.Unmarshal()
|
||||
}
|
||||
return triple, nil
|
||||
}
|
||||
|
||||
func unEscape(r []rune, isEscaped bool) string {
|
||||
if !isEscaped {
|
||||
return string(r)
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(make([]byte, 0, len(r)))
|
||||
|
||||
for i := 0; i < len(r); {
|
||||
switch r[i] {
|
||||
case '\\':
|
||||
i++
|
||||
var c byte
|
||||
switch r[i] {
|
||||
case 't':
|
||||
c = '\t'
|
||||
case 'b':
|
||||
c = '\b'
|
||||
case 'n':
|
||||
c = '\n'
|
||||
case 'r':
|
||||
c = '\r'
|
||||
case 'f':
|
||||
c = '\f'
|
||||
case '"':
|
||||
c = '"'
|
||||
case '\'':
|
||||
c = '\''
|
||||
case '\\':
|
||||
c = '\\'
|
||||
case 'u':
|
||||
rc, err := strconv.ParseInt(string(r[i+1:i+5]), 16, 32)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("internal parser error: %v", err))
|
||||
}
|
||||
buf.WriteRune(rune(rc))
|
||||
i += 5
|
||||
continue
|
||||
case 'U':
|
||||
rc, err := strconv.ParseInt(string(r[i+1:i+9]), 16, 32)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("internal parser error: %v", err))
|
||||
}
|
||||
buf.WriteRune(rune(rc))
|
||||
i += 9
|
||||
continue
|
||||
}
|
||||
buf.WriteByte(c)
|
||||
default:
|
||||
buf.WriteRune(r[i])
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
97
quad/nquads/nquads.rl
Normal file
97
quad/nquads/nquads.rl
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Ragel gramar definition derived from http://www.w3.org/TR/n-quads/#sec-grammar.
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
alphtype rune;
|
||||
|
||||
PN_CHARS_BASE = [A-Za-z]
|
||||
| 0x00c0 .. 0x00d6
|
||||
| 0x00d8 .. 0x00f6
|
||||
| 0x00f8 .. 0x02ff
|
||||
| 0x0370 .. 0x037d
|
||||
| 0x037f .. 0x1fff
|
||||
| 0x200c .. 0x200d
|
||||
| 0x2070 .. 0x218f
|
||||
| 0x2c00 .. 0x2fef
|
||||
| 0x3001 .. 0xd7ff
|
||||
| 0xf900 .. 0xfdcf
|
||||
| 0xfdf0 .. 0xfffd
|
||||
| 0x10000 .. 0xeffff
|
||||
;
|
||||
|
||||
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
|
||||
|
||||
PN_CHARS = PN_CHARS_U
|
||||
| '-'
|
||||
| [0-9]
|
||||
| 0xb7
|
||||
| 0x0300 .. 0x036f
|
||||
| 0x203f .. 0x2040
|
||||
;
|
||||
|
||||
ECHAR = ('\\' [tbnrf"'\\]) %Escape ;
|
||||
|
||||
UCHAR = ('\\u' xdigit {4}
|
||||
| '\\U' xdigit {8}) %Escape
|
||||
;
|
||||
|
||||
BLANK_NODE_LABEL = '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? ;
|
||||
|
||||
STRING_LITERAL_QUOTE = '"' (
|
||||
0x00 .. 0x09
|
||||
| 0x0b .. 0x0c
|
||||
| 0x0e .. '!'
|
||||
| '#' .. '['
|
||||
| ']' .. 0x10ffff
|
||||
| ECHAR
|
||||
| UCHAR)*
|
||||
'"'
|
||||
;
|
||||
|
||||
IRIREF = '<' (
|
||||
'!' .. ';'
|
||||
| '='
|
||||
| '?' .. '['
|
||||
| ']'
|
||||
| '_'
|
||||
| 'a' .. 'z'
|
||||
| '~'
|
||||
| 0x80 .. 0x10ffff
|
||||
| UCHAR)*
|
||||
'>'
|
||||
;
|
||||
|
||||
LANGTAG = '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* ;
|
||||
|
||||
whitespace = [ \t] ;
|
||||
|
||||
literal = STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG)? ;
|
||||
|
||||
subject = IRIREF | BLANK_NODE_LABEL ;
|
||||
predicate = IRIREF ;
|
||||
object = IRIREF | BLANK_NODE_LABEL | literal ;
|
||||
graphLabel = IRIREF | BLANK_NODE_LABEL ;
|
||||
|
||||
statement := (
|
||||
whitespace* subject >StartSubject %SetSubject
|
||||
whitespace* predicate >StartPredicate %SetPredicate
|
||||
whitespace* object >StartObject %SetObject
|
||||
(whitespace* graphLabel >StartLabel %SetLabel)?
|
||||
whitespace* '.' whitespace* ('#' any*)? >Comment
|
||||
) %Return @!Error ;
|
||||
}%%
|
||||
589
quad/nquads/nquads_test.go
Normal file
589
quad/nquads/nquads_test.go
Normal file
File diff suppressed because it is too large
Load diff
3652
quad/nquads/parse.go
Normal file
3652
quad/nquads/parse.go
Normal file
File diff suppressed because it is too large
Load diff
57
quad/nquads/parse.rl
Normal file
57
quad/nquads/parse.rl
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
// GO SOURCE FILE MACHINE GENERATED BY RAGEL; DO NOT EDIT
|
||||
|
||||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nquads
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
|
||||
"github.com/google/cayley/quad"
|
||||
)
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
include "actions.rl";
|
||||
|
||||
include "nquads.rl";
|
||||
|
||||
write data;
|
||||
}%%
|
||||
|
||||
func parse(data []rune) (quad.Quad, error) {
|
||||
var (
|
||||
cs, p int
|
||||
pe = len(data)
|
||||
eof = pe
|
||||
|
||||
subject = -1
|
||||
predicate = -1
|
||||
object = -1
|
||||
label = -1
|
||||
|
||||
isEscaped bool
|
||||
|
||||
q quad.Quad
|
||||
)
|
||||
|
||||
%%write init;
|
||||
|
||||
%%write exec;
|
||||
|
||||
return quad.Quad{}, quad.ErrInvalid
|
||||
}
|
||||
226
quad/nquads/quadfix.go
Normal file
226
quad/nquads/quadfix.go
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
)
|
||||
|
||||
func main() {
|
||||
dec := NewDecoder(os.Stdin)
|
||||
for {
|
||||
t, err := dec.Unmarshal()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return
|
||||
}
|
||||
log.Println(err)
|
||||
}
|
||||
if t.Subject[0] == ':' && t.Subject[1] == '/' {
|
||||
t.Subject = "<" + t.Subject[1:] + ">"
|
||||
} else {
|
||||
t.Subject = "_" + t.Subject
|
||||
}
|
||||
if t.Object[0] == ':' {
|
||||
if t.Object[1] == '/' {
|
||||
t.Object = "<" + t.Object[1:] + ">"
|
||||
} else {
|
||||
t.Object = "_" + t.Object
|
||||
}
|
||||
} else if t.Object[0] == '/' {
|
||||
t.Object = "<" + t.Object + ">"
|
||||
} else {
|
||||
t.Object = fmt.Sprintf(`%q`, t.Object)
|
||||
}
|
||||
fmt.Printf("%s <%s> %s .\n", t.Subject, t.Predicate, t.Object)
|
||||
}
|
||||
}
|
||||
|
||||
// Historical N-Quads parser code.
|
||||
// -------------------------------
|
||||
|
||||
var (
|
||||
ErrAbsentSubject = errors.New("nqauds: absent subject")
|
||||
ErrAbsentPredicate = errors.New("nqauds: absent predicate")
|
||||
ErrAbsentObject = errors.New("nqauds: absent object")
|
||||
ErrUnterminated = errors.New("nqauds: unterminated quad")
|
||||
)
|
||||
|
||||
func Parse(str string) (*graph.Triple, error) {
|
||||
// Skip leading whitespace.
|
||||
str = trimSpace(str)
|
||||
// Check for a comment
|
||||
if str != "" && str[0] == '#' {
|
||||
return nil, nil
|
||||
}
|
||||
sub, remainder := getTripleComponent(str)
|
||||
if sub == "" {
|
||||
return nil, ErrAbsentSubject
|
||||
}
|
||||
str = trimSpace(remainder)
|
||||
pred, remainder := getTripleComponent(str)
|
||||
if pred == "" {
|
||||
return nil, ErrAbsentPredicate
|
||||
}
|
||||
str = trimSpace(remainder)
|
||||
obj, remainder := getTripleComponent(str)
|
||||
if obj == "" {
|
||||
return nil, ErrAbsentObject
|
||||
}
|
||||
str = trimSpace(remainder)
|
||||
prov, remainder := getTripleComponent(str)
|
||||
str = trimSpace(remainder)
|
||||
if str != "" && str[0] == '.' {
|
||||
return &graph.Triple{sub, pred, obj, prov}, nil
|
||||
}
|
||||
return nil, ErrUnterminated
|
||||
}
|
||||
|
||||
func isSpace(s uint8) bool {
|
||||
return s == ' ' || s == '\t' || s == '\r'
|
||||
}
|
||||
|
||||
func trimSpace(str string) string {
|
||||
i := 0
|
||||
for i < len(str) && isSpace(str[i]) {
|
||||
i += 1
|
||||
}
|
||||
return str[i:]
|
||||
}
|
||||
|
||||
func getTripleComponent(str string) (head, tail string) {
|
||||
if len(str) == 0 {
|
||||
return "", str
|
||||
}
|
||||
if str[0] == '<' {
|
||||
return getUriPart(str[1:])
|
||||
} else if str[0] == '"' {
|
||||
return getQuotedPart(str[1:])
|
||||
} else if str[0] == '.' {
|
||||
return "", str
|
||||
} else {
|
||||
// Technically not part of the spec. But we do it anyway for convenience.
|
||||
return getUnquotedPart(str)
|
||||
}
|
||||
}
|
||||
|
||||
func getUriPart(str string) (head, tail string) {
|
||||
i := 0
|
||||
for i < len(str) && str[i] != '>' {
|
||||
i += 1
|
||||
}
|
||||
if i == len(str) {
|
||||
return "", str
|
||||
}
|
||||
head = str[0:i]
|
||||
return head, str[i+1:]
|
||||
}
|
||||
|
||||
func getQuotedPart(str string) (head, tail string) {
|
||||
var (
|
||||
i int
|
||||
start int
|
||||
)
|
||||
for i < len(str) && str[i] != '"' {
|
||||
if str[i] == '\\' {
|
||||
head += str[start:i]
|
||||
switch str[i+1] {
|
||||
case '\\':
|
||||
head += "\\"
|
||||
case 'r':
|
||||
head += "\r"
|
||||
case 'n':
|
||||
head += "\n"
|
||||
case 't':
|
||||
head += "\t"
|
||||
case '"':
|
||||
head += "\""
|
||||
default:
|
||||
return "", str
|
||||
}
|
||||
i += 2
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
if i == len(str) {
|
||||
return "", str
|
||||
}
|
||||
head += str[start:i]
|
||||
i += 1
|
||||
switch {
|
||||
case strings.HasPrefix(str[i:], "^^<"):
|
||||
// Ignore type, for now
|
||||
_, tail = getUriPart(str[i+3:])
|
||||
case str[i] == '@':
|
||||
_, tail = getUnquotedPart(str[i+1:])
|
||||
default:
|
||||
tail = str[i:]
|
||||
}
|
||||
|
||||
return head, tail
|
||||
}
|
||||
|
||||
func getUnquotedPart(str string) (head, tail string) {
|
||||
var (
|
||||
i int
|
||||
initStr = str
|
||||
start int
|
||||
)
|
||||
for i < len(str) && !isSpace(str[i]) {
|
||||
if str[i] == '"' {
|
||||
part, remainder := getQuotedPart(str[i+1:])
|
||||
if part == "" {
|
||||
return part, initStr
|
||||
}
|
||||
head += str[start:i]
|
||||
str = remainder
|
||||
i = 0
|
||||
start = 0
|
||||
head += part
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
head += str[start:i]
|
||||
return head, str[i:]
|
||||
}
|
||||
|
||||
type Decoder struct {
|
||||
r *bufio.Reader
|
||||
line []byte
|
||||
}
|
||||
|
||||
func NewDecoder(r io.Reader) *Decoder {
|
||||
return &Decoder{r: bufio.NewReader(r)}
|
||||
}
|
||||
|
||||
func (dec *Decoder) Unmarshal() (*graph.Triple, error) {
|
||||
dec.line = dec.line[:0]
|
||||
for {
|
||||
l, pre, err := dec.r.ReadLine()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dec.line = append(dec.line, l...)
|
||||
if !pre {
|
||||
break
|
||||
}
|
||||
}
|
||||
triple, err := Parse(string(dec.line))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err)
|
||||
}
|
||||
if triple == nil {
|
||||
return dec.Unmarshal()
|
||||
}
|
||||
return triple, nil
|
||||
}
|
||||
BIN
quad/ntriple_tests.tar.gz
Normal file
BIN
quad/ntriple_tests.tar.gz
Normal file
Binary file not shown.
152
quad/quad.go
Normal file
152
quad/quad.go
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package quad defines quad and triple handling.
|
||||
package quad
|
||||
|
||||
// Defines the struct which makes the TripleStore possible -- the triple.
|
||||
//
|
||||
// At its heart, it consists of three fields -- Subject, Predicate, and Object.
|
||||
// Three IDs that relate to each other. That's all there is to it. The triples
|
||||
// are the links in the graph, and the existence of node IDs is defined by the
|
||||
// fact that some triple in the graph mentions them.
|
||||
//
|
||||
// This means that a complete representation of the graph is equivalent to a
|
||||
// list of triples. The rest is just indexing for speed.
|
||||
//
|
||||
// Adding fields to the triple is not to be taken lightly. You'll see I mention
|
||||
// provenance, but don'q as yet use it in any backing store. In general, there
|
||||
// can be features that can be turned on or off for any store, but I haven'q
|
||||
// decided how to allow/disallow them yet. Another such example would be to add
|
||||
// a forward and reverse index field -- forward being "order the list of
|
||||
// objects pointed at by this subject with this predicate" such as first and
|
||||
// second children, top billing, what have you.
|
||||
//
|
||||
// There will never be that much in this file except for the definition, but
|
||||
// the consequences are not to be taken lightly. But do suggest cool features!
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalid = errors.New("invalid N-Quad")
|
||||
ErrIncomplete = errors.New("incomplete N-Quad")
|
||||
)
|
||||
|
||||
// Our triple struct, used throughout.
|
||||
type Quad struct {
|
||||
Subject string `json:"subject"`
|
||||
Predicate string `json:"predicate"`
|
||||
Object string `json:"object"`
|
||||
Provenance string `json:"provenance,omitempty"`
|
||||
}
|
||||
|
||||
// Direction specifies an edge's type.
|
||||
type Direction byte
|
||||
|
||||
// List of the valid directions of a triple.
|
||||
const (
|
||||
Any Direction = iota
|
||||
Subject
|
||||
Predicate
|
||||
Object
|
||||
Provenance
|
||||
)
|
||||
|
||||
func (d Direction) Prefix() byte {
|
||||
switch d {
|
||||
case Any:
|
||||
return 'a'
|
||||
case Subject:
|
||||
return 's'
|
||||
case Predicate:
|
||||
return 'p'
|
||||
case Provenance:
|
||||
return 'c'
|
||||
case Object:
|
||||
return 'o'
|
||||
default:
|
||||
return '\x00'
|
||||
}
|
||||
}
|
||||
|
||||
func (d Direction) String() string {
|
||||
switch d {
|
||||
case Any:
|
||||
return "any"
|
||||
case Subject:
|
||||
return "subject"
|
||||
case Predicate:
|
||||
return "predicate"
|
||||
case Provenance:
|
||||
return "provenance"
|
||||
case Object:
|
||||
return "object"
|
||||
default:
|
||||
return fmt.Sprint("illegal direction:", byte(d))
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(kortschak) Consider writing methods onto the concrete type
|
||||
// instead of the pointer. This needs benchmarking to make the decision.
|
||||
|
||||
// Per-field accessor for triples
|
||||
func (q *Quad) Get(d Direction) string {
|
||||
switch d {
|
||||
case Subject:
|
||||
return q.Subject
|
||||
case Predicate:
|
||||
return q.Predicate
|
||||
case Provenance:
|
||||
return q.Provenance
|
||||
case Object:
|
||||
return q.Object
|
||||
default:
|
||||
panic(d.String())
|
||||
}
|
||||
}
|
||||
|
||||
func (q *Quad) Equals(o *Quad) bool {
|
||||
return *q == *o
|
||||
}
|
||||
|
||||
// Pretty-prints a triple.
|
||||
func (q *Quad) String() string {
|
||||
// TODO(kortschak) String methods should generally not terminate in '\n'.
|
||||
return fmt.Sprintf("%s -- %s -> %s\n", q.Subject, q.Predicate, q.Object)
|
||||
}
|
||||
|
||||
func (q *Quad) IsValid() bool {
|
||||
return q.Subject != "" && q.Predicate != "" && q.Object != ""
|
||||
}
|
||||
|
||||
// TODO(kortschak) NTriple looks like a good candidate for conversion
|
||||
// to MarshalText() (text []byte, err error) and then move parsing code
|
||||
// from nquads to here to provide UnmarshalText(text []byte) error.
|
||||
|
||||
// Prints a triple in N-Quad format.
|
||||
func (q *Quad) NTriple() string {
|
||||
if q.Provenance == "" {
|
||||
//TODO(barakmich): Proper escaping.
|
||||
return fmt.Sprintf("%s %s %s .", q.Subject, q.Predicate, q.Object)
|
||||
} else {
|
||||
return fmt.Sprintf("%s %s %s %s .", q.Subject, q.Predicate, q.Object, q.Provenance)
|
||||
}
|
||||
}
|
||||
|
||||
type Unmarshaler interface {
|
||||
Unmarshal() (*Quad, error)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue