Use error returns and interface type for parsing

Fixes issue #72

This change simplifies interactions with parsing N-Quads and makes
reading datasets more robust. Changes made while here also improve
performance:

benchmark           old ns/op     new ns/op     delta
BenchmarkParser     1058          667           -36.96%

We still use string concatenation which I'm not wildly happy about, but
I think this can be left for a later change.

Initial changes towards idiomatic error handling have been made. More
significant changes are needed, but these have subtle design implication
and need to be thought about more.

30kmoviesdata.nt.gz has been altered to properly escape double quotes.
This was done mechanically and with manual curation to pick up
straglers.
This commit is contained in:
kortschak 2014-07-22 19:55:18 +09:30
parent abdd649c82
commit 0e0e382d2b
11 changed files with 260 additions and 226 deletions

View file

@ -17,6 +17,7 @@ package http
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"strconv"
@ -77,22 +78,32 @@ func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params
blockSize = int64(api.config.LoadSize)
}
tChan := make(chan *graph.Triple)
go nquads.ReadNQuadsFromReader(tChan, formFile)
tripleblock := make([]*graph.Triple, blockSize)
nTriples := 0
i := int64(0)
for t := range tChan {
tripleblock[i] = t
i++
nTriples++
if i == blockSize {
api.ts.AddTripleSet(tripleblock)
i = 0
dec := nquads.NewDecoder(formFile)
var (
n int
block = make([]*graph.Triple, 0, blockSize)
)
for {
t, err := dec.Unmarshal()
if err != nil {
if err == io.EOF {
break
}
panic("what can do this here?") // FIXME(kortschak)
}
block = append(block, t)
n++
if len(block) == cap(block) {
api.ts.AddTripleSet(block)
block = block[:0]
}
}
api.ts.AddTripleSet(tripleblock[0:i])
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", nTriples)
api.ts.AddTripleSet(block)
fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", n)
return 200
}