diff --git a/nquads/actions.rl b/nquads/actions.rl new file mode 100644 index 0000000..a53fc93 --- /dev/null +++ b/nquads/actions.rl @@ -0,0 +1,83 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +%%{ + machine quads; + + action Escape { + isEscaped = true + } + + action StartSubject { + subject = p + } + + action StartPredicate { + predicate = p + } + + action StartObject { + object = p + } + + action StartLabel { + label = p + } + + action SetSubject { + if subject < 0 { + panic("unexpected parser state: subject start not set") + } + triple.Subject = unEscape(data[subject:p], isEscaped) + isEscaped = false + } + + action SetPredicate { + if predicate < 0 { + panic("unexpected parser state: predicate start not set") + } + triple.Predicate = unEscape(data[predicate:p], isEscaped) + isEscaped = false + } + + action SetObject { + if object < 0 { + panic("unexpected parser state: object start not set") + } + triple.Object = unEscape(data[object:p], isEscaped) + isEscaped = false + } + + action SetLabel { + if label < 0 { + panic("unexpected parser state: label start not set") + } + triple.Provenance = unEscape(data[label:p], isEscaped) + isEscaped = false + } + + action Return { + return triple, nil + } + + action Comment { + } + + action Error { + if p < len(data) { + return graph.Triple{}, fmt.Errorf("%v: unexpected rune %q at %d", ErrInvalid, data[p], p) + } + return graph.Triple{}, ErrIncomplete + } +}%% diff --git a/nquads/nquads.rl b/nquads/nquads.rl new file mode 100644 index 0000000..1ca056c --- /dev/null +++ b/nquads/nquads.rl @@ -0,0 +1,94 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Ragel gramar definition derived from http://www.w3.org/TR/n-quads/#sec-grammar. + +%%{ + machine quads; + + alphtype rune; + + PN_CHARS_BASE = [A-Za-z] + | 0x00c0 .. 0x00d6 + | 0x00d8 .. 0x00f6 + | 0x00f8 .. 0x02ff + | 0x0370 .. 0x037d + | 0x037f .. 0x1fff + | 0x200c .. 0x200d + | 0x2070 .. 0x218f + | 0x2c00 .. 0x2fef + | 0x3001 .. 0xd7ff + | 0xf900 .. 0xfdcf + | 0xfdf0 .. 0xfffd + | 0x10000 .. 0x1efff + ; + + PN_CHARS_U = PN_CHARS_BASE | '_' | ':' ; + + PN_CHARS = PN_CHARS_U + | '-' + | [0-9] + | 0xb7 + | 0x0300 .. 0x036f + | 0x203f .. 0x2040 + ; + + ECHAR = ('\\' [tbnrf"'\\]) %Escape ; + + UCHAR = ('\\u' xdigit {4} + | '\\U' xdigit {8}) %Escape + ; + + BLANK_NODE_LABEL = '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? ; + + STRING_LITERAL_QUOTE = '"' ( + 0x00 .. 0x09 + | 0x0b .. 0x0c + | 0x0e .. '!' + | '#' .. '[' + | ']' .. '~' + | ECHAR | UCHAR)* + '"' + ; + + IRIREF = '<' ( + '!' .. ';' + | '=' + | '?' .. '[' + | ']' + | '_' + | 'a' .. 'z' + | '~' | UCHAR)* + '>' + ; + + LANGTAG = '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* ; + + whitespace = [ \t] ; + + literal = STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG)? ; + + subject = IRIREF | BLANK_NODE_LABEL ; + predicate = IRIREF ; + object = IRIREF | BLANK_NODE_LABEL | literal ; + graphLabel = IRIREF | BLANK_NODE_LABEL ; + + statement := ( + whitespace* subject >StartSubject %SetSubject + whitespace+ predicate >StartPredicate %SetPredicate + whitespace+ object >StartObject %SetObject + (whitespace+ graphLabel >StartLabel %SetLabel)? + whitespace* '.' whitespace* ('#' any*)? >Comment + ) %Return @!Error ; +}%% diff --git a/nquads/parse.rl b/nquads/parse.rl new file mode 100644 index 0000000..fb97e25 --- /dev/null +++ b/nquads/parse.rl @@ -0,0 +1,120 @@ +// GO SOURCE FILE MACHINE GENERATED BY RAGEL; DO NOT EDIT + +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nquads + +import ( + "bytes" + "errors" + "fmt" + "strconv" + + "github.com/google/cayley/graph" +) + +var ( + ErrInvalid = errors.New("invalid N-Quad") + ErrIncomplete = errors.New("incomplete N-Quad") +) + +%%{ + machine quads; + + include "actions.rl"; + + include "nquads.rl"; + + write data; +}%% + +func parse(data []rune) (graph.Triple, error) { + var ( + cs, p int + pe = len(data) + eof = pe + + subject = -1 + predicate = -1 + object = -1 + label = -1 + + isEscaped bool + + triple graph.Triple + ) + + %%write init; + + %%write exec; + + return graph.Triple{}, ErrInvalid +} + +func unEscape(r []rune, isEscaped bool) string { + if !isEscaped { + return string(r) + } + + buf := bytes.NewBuffer(make([]byte, 0, len(r))) + + for i := 0; i < len(r); { + switch r[i] { + case '\\': + i++ + var c byte + switch r[i] { + case 't': + c = '\t' + case 'b': + c = '\b' + case 'n': + c = '\n' + case 'r': + c = '\r' + case 'f': + c = '\f' + case '"': + c = '"' + case '\'': + c = '\'' + case '\\': + c = '\\' + case 'u': + rc, err := strconv.ParseInt(string(r[i+1:i+5]), 16, 32) + if err != nil { + panic(fmt.Errorf("internal parser error: %v", err)) + } + buf.WriteRune(rune(rc)) + i += 5 + continue + case 'U': + rc, err := strconv.ParseInt(string(r[i+1:i+9]), 16, 32) + if err != nil { + panic(fmt.Errorf("internal parser error: %v", err)) + } + buf.WriteRune(rune(rc)) + i += 9 + continue + } + buf.WriteByte(c) + default: + buf.WriteRune(r[i]) + } + i++ + } + + return buf.String() +}