Merge branch 'master' into parse
Conflicts: db/load.go
This commit is contained in:
commit
e6ed23ef7c
11 changed files with 187 additions and 78 deletions
|
|
@ -17,5 +17,5 @@ install:
|
|||
- go get github.com/syndtr/goleveldb/leveldb/iterator
|
||||
- go get github.com/syndtr/goleveldb/leveldb/opt
|
||||
- go get github.com/syndtr/goleveldb/leveldb/util
|
||||
- go get labix.org/v2/mgo
|
||||
- go get labix.org/v2/mgo/bson
|
||||
- go get gopkg.in/mgo.v2
|
||||
- go get gopkg.in/mgo.v2/bson
|
||||
|
|
|
|||
|
|
@ -72,14 +72,13 @@ cayley> graph.Vertex("dani").Out("follows").All()
|
|||
For somewhat more interesting data, a sample of 30k movies from Freebase comes in the checkout.
|
||||
|
||||
```
|
||||
gzip -cd 30kmoviedata.nt.gz > 30kmovies.nt
|
||||
./cayley repl --dbpath=30kmovies.nt
|
||||
./cayley repl --dbpath=30kmoviedata.nt.gz
|
||||
```
|
||||
|
||||
To run the web frontend, replace the "repl" command with "http"
|
||||
|
||||
```
|
||||
./cayley http --dbpath=30kmovies.nt
|
||||
./cayley http --dbpath=30kmoviedata.nt.gz
|
||||
```
|
||||
|
||||
And visit port 64210 on your machine, commonly [http://localhost:64210](http://localhost:64210)
|
||||
|
|
|
|||
37
db/load.go
37
db/load.go
|
|
@ -15,10 +15,14 @@
|
|||
package db
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
"github.com/google/cayley/config"
|
||||
"github.com/google/cayley/graph"
|
||||
"github.com/google/cayley/nquads"
|
||||
|
|
@ -31,7 +35,12 @@ func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
|
|||
}
|
||||
defer f.Close()
|
||||
|
||||
dec := nquads.NewDecoder(f)
|
||||
r, err := decompressor(f)
|
||||
if err != nil {
|
||||
glog.Fatalln(err)
|
||||
}
|
||||
|
||||
dec := nquads.NewDecoder(r)
|
||||
|
||||
bulker, canBulk := ts.(graph.BulkLoader)
|
||||
if canBulk {
|
||||
|
|
@ -66,3 +75,29 @@ func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
gzipMagic = "\x1f\x8b"
|
||||
b2zipMagic = "BZh"
|
||||
)
|
||||
|
||||
type readAtReader interface {
|
||||
io.Reader
|
||||
io.ReaderAt
|
||||
}
|
||||
|
||||
func decompressor(r readAtReader) (io.Reader, error) {
|
||||
var buf [3]byte
|
||||
_, err := r.ReadAt(buf[:], 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch {
|
||||
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
|
||||
return gzip.NewReader(r)
|
||||
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
|
||||
return bzip2.NewReader(r), nil
|
||||
default:
|
||||
return r, nil
|
||||
}
|
||||
}
|
||||
|
|
|
|||
81
db/load_test.go
Normal file
81
db/load_test.go
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
package db
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var testDecompressor = []struct {
|
||||
message string
|
||||
input []byte
|
||||
expect []byte
|
||||
err error
|
||||
readErr error
|
||||
}{
|
||||
{
|
||||
message: "text input",
|
||||
input: []byte("cayley data\n"),
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "gzip input",
|
||||
input: []byte{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad,
|
||||
0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00,
|
||||
},
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bzip2 input",
|
||||
input: []byte{
|
||||
0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00,
|
||||
0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c,
|
||||
0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16,
|
||||
0xa9, 0x7c, 0x78, 0x80,
|
||||
},
|
||||
err: nil,
|
||||
expect: []byte("cayley data\n"),
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bad gzip input",
|
||||
input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
|
||||
err: gzip.ErrHeader,
|
||||
expect: nil,
|
||||
readErr: nil,
|
||||
},
|
||||
{
|
||||
message: "bad bzip2 input",
|
||||
input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'},
|
||||
err: nil,
|
||||
expect: nil,
|
||||
readErr: bzip2.StructuralError("invalid compression level"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestDecompressor(t *testing.T) {
|
||||
for _, test := range testDecompressor {
|
||||
buf := bytes.NewReader(test.input)
|
||||
r, err := decompressor(buf)
|
||||
if err != test.err {
|
||||
t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err)
|
||||
}
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
p := make([]byte, len(test.expect)*2)
|
||||
n, err := r.Read(p)
|
||||
if err != test.readErr {
|
||||
t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err)
|
||||
}
|
||||
if bytes.Compare(p[:n], test.expect) != 0 {
|
||||
t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,22 +25,16 @@ You can repeat the `--db` and `--dbpath` flags from here forward instead of the
|
|||
|
||||
### Load Data Into A Graph
|
||||
|
||||
Let's extract the sample data, a couple hundred thousand movie triples, that comes in the checkout:
|
||||
First we load the data.
|
||||
|
||||
```bash
|
||||
zcat 30kmoviedatauniq.n3.gz > 30k.n3
|
||||
```
|
||||
|
||||
Then, we can load the data.
|
||||
|
||||
```bash
|
||||
./cayley load --config=cayley.cfg.overview --triples=30k.n3
|
||||
./cayley load --config=cayley.cfg.overview --triples=30kmoviedata.nt.gz
|
||||
```
|
||||
|
||||
And wait. It will load. If you'd like to watch it load, you can run
|
||||
|
||||
```bash
|
||||
./cayley load --config=cayley.cfg.overview --triples=30k.n3 --alsologtostderr
|
||||
./cayley load --config=cayley.cfg.overview --triples=30kmoviedata.nt.gz --alsologtostderr
|
||||
```
|
||||
|
||||
And watch the log output go by.
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
"labix.org/v2/mgo"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"gopkg.in/mgo.v2"
|
||||
"gopkg.in/mgo.v2/bson"
|
||||
|
||||
"github.com/google/cayley/graph"
|
||||
"github.com/google/cayley/graph/iterator"
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ import (
|
|||
"hash"
|
||||
"log"
|
||||
|
||||
"labix.org/v2/mgo"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"gopkg.in/mgo.v2"
|
||||
"gopkg.in/mgo.v2/bson"
|
||||
|
||||
"github.com/barakmich/glog"
|
||||
"github.com/google/cayley/graph"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue