Create quads hierarchy

* Move nquads into quad.
* Create cquads simplified parser in quad.
* Move Triple (renamed Quad) to quad.

Also made sure mongo actually implements BulkLoader.
This commit is contained in:
kortschak 2014-07-27 17:42:45 +09:30
parent 01bc63810b
commit 401c58426f
51 changed files with 13400 additions and 5495 deletions

95
quad/cquads/actions.rl Normal file
View file

@ -0,0 +1,95 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
%%{
machine quads;
action Escape {
isEscaped = true
}
action Quote {
isQuoted = true
}
action StartSubject {
subject = p
}
action StartPredicate {
predicate = p
}
action StartObject {
object = p
}
action StartLabel {
label = p
}
action SetSubject {
if subject < 0 {
panic("unexpected parser state: subject start not set")
}
q.Subject = unEscape(data[subject:p], isQuoted, isEscaped)
isEscaped = false
isQuoted = false
}
action SetPredicate {
if predicate < 0 {
panic("unexpected parser state: predicate start not set")
}
q.Predicate = unEscape(data[predicate:p], isQuoted, isEscaped)
isEscaped = false
isQuoted = false
}
action SetObject {
if object < 0 {
panic("unexpected parser state: object start not set")
}
q.Object = unEscape(data[object:p], isQuoted, isEscaped)
isEscaped = false
isQuoted = false
}
action SetLabel {
if label < 0 {
panic("unexpected parser state: label start not set")
}
q.Provenance = unEscape(data[label:p], isQuoted, isEscaped)
isEscaped = false
isQuoted = false
}
action Return {
return q, nil
}
action Comment {
}
action Error {
if p < len(data) {
if r := data[p]; r < unicode.MaxASCII {
return q, fmt.Errorf("%v: unexpected rune %q at %d", quad.ErrInvalid, data[p], p)
} else {
return q, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", quad.ErrInvalid, data[p], data[p], p)
}
}
return q, quad.ErrIncomplete
}
}%%

141
quad/cquads/cquads.go Normal file
View file

@ -0,0 +1,141 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package cquads implements parsing N-Quads like line-based syntax
// for RDF datasets.
//
// N-Quad parsing is performed as based on a simplified grammar derived from
// the N-Quads grammar defined by http://www.w3.org/TR/n-quads/.
//
// For a complete definition of the grammar, see cquads.rl.
package cquads
import (
"bufio"
"bytes"
"fmt"
"io"
"strconv"
"github.com/google/cayley/quad"
)
// Parse returns a valid quad.Quad or a non-nil error. Parse does
// handle comments except where the comment placement does not prevent
// a complete valid quad.Quad from being defined.
func Parse(str string) (*quad.Quad, error) {
q, err := parse([]rune(str))
return &q, err
}
// Decoder implements simplified N-Quad document parsing.
type Decoder struct {
r *bufio.Reader
line []byte
}
// NewDecoder returns an N-Quad decoder that takes its input from the
// provided io.Reader.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r: bufio.NewReader(r)}
}
// Unmarshal returns the next valid N-Quad as a quad.Quad, or an error.
func (dec *Decoder) Unmarshal() (*quad.Quad, error) {
dec.line = dec.line[:0]
var line []byte
for {
for {
l, pre, err := dec.r.ReadLine()
if err != nil {
return nil, err
}
dec.line = append(dec.line, l...)
if !pre {
break
}
}
if line = bytes.TrimSpace(dec.line); len(line) != 0 && line[0] != '#' {
break
}
dec.line = dec.line[:0]
}
triple, err := Parse(string(line))
if err != nil {
return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err)
}
if triple == nil {
return dec.Unmarshal()
}
return triple, nil
}
func unEscape(r []rune, isQuoted, isEscaped bool) string {
if isQuoted {
r = r[1 : len(r)-1]
}
if !isEscaped {
return string(r)
}
buf := bytes.NewBuffer(make([]byte, 0, len(r)))
for i := 0; i < len(r); {
switch r[i] {
case '\\':
i++
var c byte
switch r[i] {
case 't':
c = '\t'
case 'b':
c = '\b'
case 'n':
c = '\n'
case 'r':
c = '\r'
case 'f':
c = '\f'
case '"':
c = '"'
case '\'':
c = '\''
case '\\':
c = '\\'
case 'u':
rc, err := strconv.ParseInt(string(r[i+1:i+5]), 16, 32)
if err != nil {
panic(fmt.Errorf("internal parser error: %v", err))
}
buf.WriteRune(rune(rc))
i += 5
continue
case 'U':
rc, err := strconv.ParseInt(string(r[i+1:i+9]), 16, 32)
if err != nil {
panic(fmt.Errorf("internal parser error: %v", err))
}
buf.WriteRune(rune(rc))
i += 9
continue
}
buf.WriteByte(c)
default:
buf.WriteRune(r[i])
}
i++
}
return buf.String()
}

106
quad/cquads/cquads.rl Normal file
View file

@ -0,0 +1,106 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Ragel gramar definition derived from http://www.w3.org/TR/n-quads/#sec-grammar.
%%{
machine quads;
alphtype rune;
PN_CHARS_BASE = [A-Za-z]
| 0x00c0 .. 0x00d6
| 0x00d8 .. 0x00f6
| 0x00f8 .. 0x02ff
| 0x0370 .. 0x037d
| 0x037f .. 0x1fff
| 0x200c .. 0x200d
| 0x2070 .. 0x218f
| 0x2c00 .. 0x2fef
| 0x3001 .. 0xd7ff
| 0xf900 .. 0xfdcf
| 0xfdf0 .. 0xfffd
| 0x10000 .. 0xeffff
;
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
PN_CHARS = PN_CHARS_U
| '-'
| [0-9]
| 0xb7
| 0x0300 .. 0x036f
| 0x203f .. 0x2040
;
ECHAR = ('\\' [tbnrf"'\\]) %Escape ;
UCHAR = ('\\u' xdigit {4}
| '\\U' xdigit {8}) %Escape
;
BLANK_NODE_LABEL = '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? ;
STRING_LITERAL = (
'!'
| '#' .. '['
| ']' .. 0x7e
| 0x80 .. 0x10ffff
| ECHAR
| UCHAR)+ - ('_:' | any* '.' | '#' any*)
;
STRING_LITERAL_QUOTE = '"' (
0x00 .. 0x09
| 0x0b .. 0x0c
| 0x0e .. '!'
| '#' .. '['
| ']' .. 0x10ffff
| ECHAR
| UCHAR)*
'"'
;
IRIREF = '<' (
'!' .. ';'
| '='
| '?' .. '['
| ']'
| '_'
| 'a' .. 'z'
| '~'
| 0x80 .. 0x10ffff
| UCHAR)*
'>'
;
LANGTAG = '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* ;
whitespace = [ \t] ;
literal = STRING_LITERAL | STRING_LITERAL_QUOTE % Quote | STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG) ;
subject = (literal | BLANK_NODE_LABEL) ;
predicate = literal ;
object = (literal | BLANK_NODE_LABEL) ;
graphLabel = (literal | BLANK_NODE_LABEL) ;
statement := (
whitespace* subject >StartSubject %SetSubject
whitespace+ predicate >StartPredicate %SetPredicate
whitespace+ object >StartObject %SetObject
(whitespace+ graphLabel >StartLabel %SetLabel)?
whitespace* '.' whitespace* ('#' any*)? >Comment
) %Return @!Error ;
}%%

782
quad/cquads/cquads_test.go Normal file

File diff suppressed because it is too large Load diff

6692
quad/cquads/parse.go Normal file

File diff suppressed because it is too large Load diff

58
quad/cquads/parse.rl Normal file
View file

@ -0,0 +1,58 @@
// GO SOURCE FILE MACHINE GENERATED BY RAGEL; DO NOT EDIT
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cquads
import (
"fmt"
"unicode"
"github.com/google/cayley/quad"
)
%%{
machine quads;
include "actions.rl";
include "cquads.rl";
write data;
}%%
func parse(data []rune) (quad.Quad, error) {
var (
cs, p int
pe = len(data)
eof = pe
subject = -1
predicate = -1
object = -1
label = -1
isEscaped bool
isQuoted bool
q quad.Quad
)
%%write init;
%%write exec;
return quad.Quad{}, quad.ErrInvalid
}