Create quads hierarchy
* Move nquads into quad. * Create cquads simplified parser in quad. * Move Triple (renamed Quad) to quad. Also made sure mongo actually implements BulkLoader.
This commit is contained in:
parent
01bc63810b
commit
401c58426f
51 changed files with 13400 additions and 5495 deletions
95
quad/cquads/actions.rl
Normal file
95
quad/cquads/actions.rl
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
action Escape {
|
||||
isEscaped = true
|
||||
}
|
||||
|
||||
action Quote {
|
||||
isQuoted = true
|
||||
}
|
||||
|
||||
action StartSubject {
|
||||
subject = p
|
||||
}
|
||||
|
||||
action StartPredicate {
|
||||
predicate = p
|
||||
}
|
||||
|
||||
action StartObject {
|
||||
object = p
|
||||
}
|
||||
|
||||
action StartLabel {
|
||||
label = p
|
||||
}
|
||||
|
||||
action SetSubject {
|
||||
if subject < 0 {
|
||||
panic("unexpected parser state: subject start not set")
|
||||
}
|
||||
q.Subject = unEscape(data[subject:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action SetPredicate {
|
||||
if predicate < 0 {
|
||||
panic("unexpected parser state: predicate start not set")
|
||||
}
|
||||
q.Predicate = unEscape(data[predicate:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action SetObject {
|
||||
if object < 0 {
|
||||
panic("unexpected parser state: object start not set")
|
||||
}
|
||||
q.Object = unEscape(data[object:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action SetLabel {
|
||||
if label < 0 {
|
||||
panic("unexpected parser state: label start not set")
|
||||
}
|
||||
q.Provenance = unEscape(data[label:p], isQuoted, isEscaped)
|
||||
isEscaped = false
|
||||
isQuoted = false
|
||||
}
|
||||
|
||||
action Return {
|
||||
return q, nil
|
||||
}
|
||||
|
||||
action Comment {
|
||||
}
|
||||
|
||||
action Error {
|
||||
if p < len(data) {
|
||||
if r := data[p]; r < unicode.MaxASCII {
|
||||
return q, fmt.Errorf("%v: unexpected rune %q at %d", quad.ErrInvalid, data[p], p)
|
||||
} else {
|
||||
return q, fmt.Errorf("%v: unexpected rune %q (\\u%04x) at %d", quad.ErrInvalid, data[p], data[p], p)
|
||||
}
|
||||
}
|
||||
return q, quad.ErrIncomplete
|
||||
}
|
||||
}%%
|
||||
141
quad/cquads/cquads.go
Normal file
141
quad/cquads/cquads.go
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package cquads implements parsing N-Quads like line-based syntax
|
||||
// for RDF datasets.
|
||||
//
|
||||
// N-Quad parsing is performed as based on a simplified grammar derived from
|
||||
// the N-Quads grammar defined by http://www.w3.org/TR/n-quads/.
|
||||
//
|
||||
// For a complete definition of the grammar, see cquads.rl.
|
||||
package cquads
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/google/cayley/quad"
|
||||
)
|
||||
|
||||
// Parse returns a valid quad.Quad or a non-nil error. Parse does
|
||||
// handle comments except where the comment placement does not prevent
|
||||
// a complete valid quad.Quad from being defined.
|
||||
func Parse(str string) (*quad.Quad, error) {
|
||||
q, err := parse([]rune(str))
|
||||
return &q, err
|
||||
}
|
||||
|
||||
// Decoder implements simplified N-Quad document parsing.
|
||||
type Decoder struct {
|
||||
r *bufio.Reader
|
||||
line []byte
|
||||
}
|
||||
|
||||
// NewDecoder returns an N-Quad decoder that takes its input from the
|
||||
// provided io.Reader.
|
||||
func NewDecoder(r io.Reader) *Decoder {
|
||||
return &Decoder{r: bufio.NewReader(r)}
|
||||
}
|
||||
|
||||
// Unmarshal returns the next valid N-Quad as a quad.Quad, or an error.
|
||||
func (dec *Decoder) Unmarshal() (*quad.Quad, error) {
|
||||
dec.line = dec.line[:0]
|
||||
var line []byte
|
||||
for {
|
||||
for {
|
||||
l, pre, err := dec.r.ReadLine()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dec.line = append(dec.line, l...)
|
||||
if !pre {
|
||||
break
|
||||
}
|
||||
}
|
||||
if line = bytes.TrimSpace(dec.line); len(line) != 0 && line[0] != '#' {
|
||||
break
|
||||
}
|
||||
dec.line = dec.line[:0]
|
||||
}
|
||||
triple, err := Parse(string(line))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q: %v", dec.line, err)
|
||||
}
|
||||
if triple == nil {
|
||||
return dec.Unmarshal()
|
||||
}
|
||||
return triple, nil
|
||||
}
|
||||
|
||||
func unEscape(r []rune, isQuoted, isEscaped bool) string {
|
||||
if isQuoted {
|
||||
r = r[1 : len(r)-1]
|
||||
}
|
||||
if !isEscaped {
|
||||
return string(r)
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(make([]byte, 0, len(r)))
|
||||
|
||||
for i := 0; i < len(r); {
|
||||
switch r[i] {
|
||||
case '\\':
|
||||
i++
|
||||
var c byte
|
||||
switch r[i] {
|
||||
case 't':
|
||||
c = '\t'
|
||||
case 'b':
|
||||
c = '\b'
|
||||
case 'n':
|
||||
c = '\n'
|
||||
case 'r':
|
||||
c = '\r'
|
||||
case 'f':
|
||||
c = '\f'
|
||||
case '"':
|
||||
c = '"'
|
||||
case '\'':
|
||||
c = '\''
|
||||
case '\\':
|
||||
c = '\\'
|
||||
case 'u':
|
||||
rc, err := strconv.ParseInt(string(r[i+1:i+5]), 16, 32)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("internal parser error: %v", err))
|
||||
}
|
||||
buf.WriteRune(rune(rc))
|
||||
i += 5
|
||||
continue
|
||||
case 'U':
|
||||
rc, err := strconv.ParseInt(string(r[i+1:i+9]), 16, 32)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("internal parser error: %v", err))
|
||||
}
|
||||
buf.WriteRune(rune(rc))
|
||||
i += 9
|
||||
continue
|
||||
}
|
||||
buf.WriteByte(c)
|
||||
default:
|
||||
buf.WriteRune(r[i])
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
106
quad/cquads/cquads.rl
Normal file
106
quad/cquads/cquads.rl
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Ragel gramar definition derived from http://www.w3.org/TR/n-quads/#sec-grammar.
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
alphtype rune;
|
||||
|
||||
PN_CHARS_BASE = [A-Za-z]
|
||||
| 0x00c0 .. 0x00d6
|
||||
| 0x00d8 .. 0x00f6
|
||||
| 0x00f8 .. 0x02ff
|
||||
| 0x0370 .. 0x037d
|
||||
| 0x037f .. 0x1fff
|
||||
| 0x200c .. 0x200d
|
||||
| 0x2070 .. 0x218f
|
||||
| 0x2c00 .. 0x2fef
|
||||
| 0x3001 .. 0xd7ff
|
||||
| 0xf900 .. 0xfdcf
|
||||
| 0xfdf0 .. 0xfffd
|
||||
| 0x10000 .. 0xeffff
|
||||
;
|
||||
|
||||
PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ;
|
||||
|
||||
PN_CHARS = PN_CHARS_U
|
||||
| '-'
|
||||
| [0-9]
|
||||
| 0xb7
|
||||
| 0x0300 .. 0x036f
|
||||
| 0x203f .. 0x2040
|
||||
;
|
||||
|
||||
ECHAR = ('\\' [tbnrf"'\\]) %Escape ;
|
||||
|
||||
UCHAR = ('\\u' xdigit {4}
|
||||
| '\\U' xdigit {8}) %Escape
|
||||
;
|
||||
|
||||
BLANK_NODE_LABEL = '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? ;
|
||||
|
||||
STRING_LITERAL = (
|
||||
'!'
|
||||
| '#' .. '['
|
||||
| ']' .. 0x7e
|
||||
| 0x80 .. 0x10ffff
|
||||
| ECHAR
|
||||
| UCHAR)+ - ('_:' | any* '.' | '#' any*)
|
||||
;
|
||||
|
||||
STRING_LITERAL_QUOTE = '"' (
|
||||
0x00 .. 0x09
|
||||
| 0x0b .. 0x0c
|
||||
| 0x0e .. '!'
|
||||
| '#' .. '['
|
||||
| ']' .. 0x10ffff
|
||||
| ECHAR
|
||||
| UCHAR)*
|
||||
'"'
|
||||
;
|
||||
|
||||
IRIREF = '<' (
|
||||
'!' .. ';'
|
||||
| '='
|
||||
| '?' .. '['
|
||||
| ']'
|
||||
| '_'
|
||||
| 'a' .. 'z'
|
||||
| '~'
|
||||
| 0x80 .. 0x10ffff
|
||||
| UCHAR)*
|
||||
'>'
|
||||
;
|
||||
|
||||
LANGTAG = '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* ;
|
||||
|
||||
whitespace = [ \t] ;
|
||||
|
||||
literal = STRING_LITERAL | STRING_LITERAL_QUOTE % Quote | STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG) ;
|
||||
|
||||
subject = (literal | BLANK_NODE_LABEL) ;
|
||||
predicate = literal ;
|
||||
object = (literal | BLANK_NODE_LABEL) ;
|
||||
graphLabel = (literal | BLANK_NODE_LABEL) ;
|
||||
|
||||
statement := (
|
||||
whitespace* subject >StartSubject %SetSubject
|
||||
whitespace+ predicate >StartPredicate %SetPredicate
|
||||
whitespace+ object >StartObject %SetObject
|
||||
(whitespace+ graphLabel >StartLabel %SetLabel)?
|
||||
whitespace* '.' whitespace* ('#' any*)? >Comment
|
||||
) %Return @!Error ;
|
||||
}%%
|
||||
782
quad/cquads/cquads_test.go
Normal file
782
quad/cquads/cquads_test.go
Normal file
File diff suppressed because it is too large
Load diff
6692
quad/cquads/parse.go
Normal file
6692
quad/cquads/parse.go
Normal file
File diff suppressed because it is too large
Load diff
58
quad/cquads/parse.rl
Normal file
58
quad/cquads/parse.rl
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
// GO SOURCE FILE MACHINE GENERATED BY RAGEL; DO NOT EDIT
|
||||
|
||||
// Copyright 2014 The Cayley Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cquads
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
|
||||
"github.com/google/cayley/quad"
|
||||
)
|
||||
|
||||
%%{
|
||||
machine quads;
|
||||
|
||||
include "actions.rl";
|
||||
|
||||
include "cquads.rl";
|
||||
|
||||
write data;
|
||||
}%%
|
||||
|
||||
func parse(data []rune) (quad.Quad, error) {
|
||||
var (
|
||||
cs, p int
|
||||
pe = len(data)
|
||||
eof = pe
|
||||
|
||||
subject = -1
|
||||
predicate = -1
|
||||
object = -1
|
||||
label = -1
|
||||
|
||||
isEscaped bool
|
||||
isQuoted bool
|
||||
|
||||
q quad.Quad
|
||||
)
|
||||
|
||||
%%write init;
|
||||
|
||||
%%write exec;
|
||||
|
||||
return quad.Quad{}, quad.ErrInvalid
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue