Add transparent input decompression
This supports gzip and bzip2 by magic number determination. Trailing whitespace differences in documentation due to opinionated editor.
This commit is contained in:
parent
abdd649c82
commit
9bf09a5db5
7 changed files with 100 additions and 72 deletions
|
|
@ -72,14 +72,13 @@ cayley> graph.Vertex("dani").Out("follows").All()
|
||||||
For somewhat more interesting data, a sample of 30k movies from Freebase comes in the checkout.
|
For somewhat more interesting data, a sample of 30k movies from Freebase comes in the checkout.
|
||||||
|
|
||||||
```
|
```
|
||||||
gzip -cd 30kmoviedata.nt.gz > 30kmovies.nt
|
./cayley repl --dbpath=30kmoviedata.nt.gz
|
||||||
./cayley repl --dbpath=30kmovies.nt
|
|
||||||
```
|
```
|
||||||
|
|
||||||
To run the web frontend, replace the "repl" command with "http"
|
To run the web frontend, replace the "repl" command with "http"
|
||||||
|
|
||||||
```
|
```
|
||||||
./cayley http --dbpath=30kmovies.nt
|
./cayley http --dbpath=30kmoviedata.nt.gz
|
||||||
```
|
```
|
||||||
|
|
||||||
And visit port 64210 on your machine, commonly [http://localhost:64210](http://localhost:64210)
|
And visit port 64210 on your machine, commonly [http://localhost:64210](http://localhost:64210)
|
||||||
|
|
|
||||||
37
db/load.go
37
db/load.go
|
|
@ -15,6 +15,10 @@
|
||||||
package db
|
package db
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"compress/bzip2"
|
||||||
|
"compress/gzip"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/barakmich/glog"
|
"github.com/barakmich/glog"
|
||||||
|
|
@ -54,7 +58,38 @@ func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) {
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
nquads.ReadNQuadsFromReader(c, f)
|
r, err := decompressor(f)
|
||||||
|
if err != nil {
|
||||||
|
glog.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nquads.ReadNQuadsFromReader(c, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
gzipMagic = "\x1f\x8b"
|
||||||
|
b2zipMagic = "BZh"
|
||||||
|
)
|
||||||
|
|
||||||
|
type readAtReader interface {
|
||||||
|
io.Reader
|
||||||
|
io.ReaderAt
|
||||||
|
}
|
||||||
|
|
||||||
|
func decompressor(r readAtReader) (io.Reader, error) {
|
||||||
|
var buf [3]byte
|
||||||
|
_, err := r.ReadAt(buf[:], 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
|
||||||
|
return gzip.NewReader(r)
|
||||||
|
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
|
||||||
|
return bzip2.NewReader(r), nil
|
||||||
|
default:
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadTriplesInto(tChan chan *graph.Triple, ts graph.TripleStore, loadSize int) {
|
func LoadTriplesInto(tChan chan *graph.Triple, ts graph.TripleStore, loadSize int) {
|
||||||
|
|
|
||||||
|
|
@ -25,22 +25,16 @@ You can repeat the `--db` and `--dbpath` flags from here forward instead of the
|
||||||
|
|
||||||
### Load Data Into A Graph
|
### Load Data Into A Graph
|
||||||
|
|
||||||
Let's extract the sample data, a couple hundred thousand movie triples, that comes in the checkout:
|
First we load the data.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
zcat 30kmoviedatauniq.n3.gz > 30k.n3
|
./cayley load --config=cayley.cfg.overview --triples=30kmoviedata.nt.gz
|
||||||
```
|
|
||||||
|
|
||||||
Then, we can load the data.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./cayley load --config=cayley.cfg.overview --triples=30k.n3
|
|
||||||
```
|
```
|
||||||
|
|
||||||
And wait. It will load. If you'd like to watch it load, you can run
|
And wait. It will load. If you'd like to watch it load, you can run
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./cayley load --config=cayley.cfg.overview --triples=30k.n3 --alsologtostderr
|
./cayley load --config=cayley.cfg.overview --triples=30kmoviedata.nt.gz --alsologtostderr
|
||||||
```
|
```
|
||||||
|
|
||||||
And watch the log output go by.
|
And watch the log output go by.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue