diff --git a/nquads/nquads.go b/nquads/nquads.go index d0570a6..3096baa 100644 --- a/nquads/nquads.go +++ b/nquads/nquads.go @@ -17,20 +17,13 @@ package nquads import ( "bufio" "bytes" - "errors" "fmt" "io" + "strconv" "github.com/google/cayley/graph" ) -var ( - ErrAbsentSubject = errors.New("nqauds: absent subject") - ErrAbsentPredicate = errors.New("nqauds: absent predicate") - ErrAbsentObject = errors.New("nqauds: absent object") - ErrUnterminated = errors.New("nqauds: unterminated quad") -) - // Parse returns a valid graph.Triple or a non-nil error. func Parse(str string) (*graph.Triple, error) { t, err := parse([]rune(str)) @@ -74,3 +67,59 @@ func (dec *Decoder) Unmarshal() (*graph.Triple, error) { } return triple, nil } + +func unEscape(r []rune, isEscaped bool) string { + if !isEscaped { + return string(r) + } + + buf := bytes.NewBuffer(make([]byte, 0, len(r))) + + for i := 0; i < len(r); { + switch r[i] { + case '\\': + i++ + var c byte + switch r[i] { + case 't': + c = '\t' + case 'b': + c = '\b' + case 'n': + c = '\n' + case 'r': + c = '\r' + case 'f': + c = '\f' + case '"': + c = '"' + case '\'': + c = '\'' + case '\\': + c = '\\' + case 'u': + rc, err := strconv.ParseInt(string(r[i+1:i+5]), 16, 32) + if err != nil { + panic(fmt.Errorf("internal parser error: %v", err)) + } + buf.WriteRune(rune(rc)) + i += 5 + continue + case 'U': + rc, err := strconv.ParseInt(string(r[i+1:i+9]), 16, 32) + if err != nil { + panic(fmt.Errorf("internal parser error: %v", err)) + } + buf.WriteRune(rune(rc)) + i += 9 + continue + } + buf.WriteByte(c) + default: + buf.WriteRune(r[i]) + } + i++ + } + + return buf.String() +} diff --git a/nquads/nquads_test.go b/nquads/nquads_test.go index 328c1e8..2f2a39c 100644 --- a/nquads/nquads_test.go +++ b/nquads/nquads_test.go @@ -480,6 +480,36 @@ func TestDecoder(t *testing.T) { } } +var escapeSequenceTests = []struct { + input string + expect string +}{ + {input: `\t`, expect: "\t"}, + {input: `\b`, expect: "\b"}, + {input: `\n`, expect: "\n"}, + {input: `\r`, expect: "\r"}, + {input: `\f`, expect: "\f"}, + {input: `\\`, expect: "\\"}, + {input: `\u00b7`, expect: "·"}, + {input: `\U000000b7`, expect: "·"}, + + {input: `\t\u00b7`, expect: "\t·"}, + {input: `\b\U000000b7`, expect: "\b·"}, + {input: `\u00b7\n`, expect: "·\n"}, + {input: `\U000000b7\r`, expect: "·\r"}, + {input: `\u00b7\f\U000000b7`, expect: "·\f·"}, + {input: `\U000000b7\\\u00b7`, expect: "·\\·"}, +} + +func TestUnescape(t *testing.T) { + for _, test := range escapeSequenceTests { + got := unEscape([]rune(test.input), true) + if got != test.expect { + t.Errorf("Failed to properly unescape %q, got:%q expect:%q", test.input, got, test.expect) + } + } +} + var result *graph.Triple func BenchmarkParser(b *testing.B) { diff --git a/nquads/parse.rl b/nquads/parse.rl index 6e73684..7ace98c 100644 --- a/nquads/parse.rl +++ b/nquads/parse.rl @@ -17,10 +17,8 @@ package nquads import ( - "bytes" "errors" "fmt" - "strconv" "unicode" "github.com/google/cayley/graph" @@ -63,59 +61,3 @@ func parse(data []rune) (graph.Triple, error) { return graph.Triple{}, ErrInvalid } - -func unEscape(r []rune, isEscaped bool) string { - if !isEscaped { - return string(r) - } - - buf := bytes.NewBuffer(make([]byte, 0, len(r))) - - for i := 0; i < len(r); { - switch r[i] { - case '\\': - i++ - var c byte - switch r[i] { - case 't': - c = '\t' - case 'b': - c = '\b' - case 'n': - c = '\n' - case 'r': - c = '\r' - case 'f': - c = '\f' - case '"': - c = '"' - case '\'': - c = '\'' - case '\\': - c = '\\' - case 'u': - rc, err := strconv.ParseInt(string(r[i+1:i+5]), 16, 32) - if err != nil { - panic(fmt.Errorf("internal parser error: %v", err)) - } - buf.WriteRune(rune(rc)) - i += 5 - continue - case 'U': - rc, err := strconv.ParseInt(string(r[i+1:i+9]), 16, 32) - if err != nil { - panic(fmt.Errorf("internal parser error: %v", err)) - } - buf.WriteRune(rune(rc)) - i += 9 - continue - } - buf.WriteByte(c) - default: - buf.WriteRune(r[i]) - } - i++ - } - - return buf.String() -}