Add transparent input decompression
This supports gzip and bzip2 by magic number determination. Trailing whitespace differences in documentation due to opinionated editor.
This commit is contained in:
parent
abdd649c82
commit
9bf09a5db5
7 changed files with 100 additions and 72 deletions
|
|
@ -72,14 +72,13 @@ cayley> graph.Vertex("dani").Out("follows").All()
|
|||
For somewhat more interesting data, a sample of 30k movies from Freebase comes in the checkout.
|
||||
|
||||
```
|
||||
gzip -cd 30kmoviedata.nt.gz > 30kmovies.nt
|
||||
./cayley repl --dbpath=30kmovies.nt
|
||||
./cayley repl --dbpath=30kmoviedata.nt.gz
|
||||
```
|
||||
|
||||
To run the web frontend, replace the "repl" command with "http"
|
||||
|
||||
```
|
||||
./cayley http --dbpath=30kmovies.nt
|
||||
./cayley http --dbpath=30kmoviedata.nt.gz
|
||||
```
|
||||
|
||||
And visit port 64210 on your machine, commonly [http://localhost:64210](http://localhost:64210)
|
||||
|
|
|
|||
37
db/load.go
37
db/load.go
|
|
@ -15,6 +15,10 @@
|
|||
package db
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
|
|
@ -54,7 +58,38 @@ func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) {
|
|||
}
|
||||
}()
|
||||
|
||||
nquads.ReadNQuadsFromReader(c, f)
|
||||
r, err := decompressor(f)
|
||||
if err != nil {
|
||||
glog.Fatalln(err)
|
||||
}
|
||||
|
||||
nquads.ReadNQuadsFromReader(c, r)
|
||||
}
|
||||
|
||||
const (
|
||||
gzipMagic = "\x1f\x8b"
|
||||
b2zipMagic = "BZh"
|
||||
)
|
||||
|
||||
type readAtReader interface {
|
||||
io.Reader
|
||||
io.ReaderAt
|
||||
}
|
||||
|
||||
func decompressor(r readAtReader) (io.Reader, error) {
|
||||
var buf [3]byte
|
||||
_, err := r.ReadAt(buf[:], 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch {
|
||||
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
|
||||
return gzip.NewReader(r)
|
||||
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
|
||||
return bzip2.NewReader(r), nil
|
||||
default:
|
||||
return r, nil
|
||||
}
|
||||
}
|
||||
|
||||
func LoadTriplesInto(tChan chan *graph.Triple, ts graph.TripleStore, loadSize int) {
|
||||
|
|
|
|||
|
|
@ -25,22 +25,16 @@ You can repeat the `--db` and `--dbpath` flags from here forward instead of the
|
|||
|
||||
### Load Data Into A Graph
|
||||
|
||||
Let's extract the sample data, a couple hundred thousand movie triples, that comes in the checkout:
|
||||
First we load the data.
|
||||
|
||||
```bash
|
||||
zcat 30kmoviedatauniq.n3.gz > 30k.n3
|
||||
```
|
||||
|
||||
Then, we can load the data.
|
||||
|
||||
```bash
|
||||
./cayley load --config=cayley.cfg.overview --triples=30k.n3
|
||||
./cayley load --config=cayley.cfg.overview --triples=30kmoviedata.nt.gz
|
||||
```
|
||||
|
||||
And wait. It will load. If you'd like to watch it load, you can run
|
||||
|
||||
```bash
|
||||
./cayley load --config=cayley.cfg.overview --triples=30k.n3 --alsologtostderr
|
||||
./cayley load --config=cayley.cfg.overview --triples=30kmoviedata.nt.gz --alsologtostderr
|
||||
```
|
||||
|
||||
And watch the log output go by.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue