Add transparent input decompression

This supports gzip and bzip2 by magic number determination.

Trailing whitespace differences in documentation due to opinionated
editor.
This commit is contained in:
kortschak 2014-07-19 12:49:55 +09:30
parent abdd649c82
commit 9bf09a5db5
7 changed files with 100 additions and 72 deletions

View file

@ -15,6 +15,10 @@
package db
import (
"bytes"
"compress/bzip2"
"compress/gzip"
"io"
"os"
"github.com/barakmich/glog"
@ -54,7 +58,38 @@ func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) {
}
}()
nquads.ReadNQuadsFromReader(c, f)
r, err := decompressor(f)
if err != nil {
glog.Fatalln(err)
}
nquads.ReadNQuadsFromReader(c, r)
}
const (
gzipMagic = "\x1f\x8b"
b2zipMagic = "BZh"
)
type readAtReader interface {
io.Reader
io.ReaderAt
}
func decompressor(r readAtReader) (io.Reader, error) {
var buf [3]byte
_, err := r.ReadAt(buf[:], 0)
if err != nil {
return nil, err
}
switch {
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
return gzip.NewReader(r)
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
return bzip2.NewReader(r), nil
default:
return r, nil
}
}
func LoadTriplesInto(tChan chan *graph.Triple, ts graph.TripleStore, loadSize int) {