Merge pull request #233 from barakmich/decompressor

fix decompressing for http
This commit is contained in:
Barak Michener 2015-04-16 19:11:24 -04:00
commit 6764ea0295
5 changed files with 151 additions and 105 deletions

View file

@ -17,10 +17,6 @@
package main
import (
"bufio"
"bytes"
"compress/bzip2"
"compress/gzip"
"flag"
"fmt"
"io"
@ -37,6 +33,7 @@ import (
"github.com/google/cayley/db"
"github.com/google/cayley/graph"
"github.com/google/cayley/http"
"github.com/google/cayley/internal"
"github.com/google/cayley/quad"
"github.com/google/cayley/quad/cquads"
"github.com/google/cayley/quad/nquads"
@ -290,8 +287,11 @@ func decompressAndLoad(qw graph.QuadWriter, cfg *config.Config, path, typ string
r = res.Body
}
r, err = decompressor(r)
r, err = internal.Decompressor(r)
if err != nil {
if err == io.EOF {
return nil
}
return err
}
@ -311,24 +311,3 @@ func decompressAndLoad(qw graph.QuadWriter, cfg *config.Config, path, typ string
return db.Load(qw, cfg, dec)
}
const (
gzipMagic = "\x1f\x8b"
b2zipMagic = "BZh"
)
func decompressor(r io.Reader) (io.Reader, error) {
br := bufio.NewReader(r)
buf, err := br.Peek(3)
if err != nil {
return nil, err
}
switch {
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
return gzip.NewReader(br)
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
return bzip2.NewReader(br), nil
default:
return br, nil
}
}

View file

@ -15,16 +15,12 @@
package main
import (
"bytes"
"compress/bzip2"
"compress/gzip"
"flag"
"fmt"
"io"
"os"
"reflect"
"sort"
"strings"
"sync"
"testing"
"time"
@ -650,75 +646,3 @@ type reader struct {
func (r reader) Read(p []byte) (int, error) {
return r.r.Read(p)
}
var testDecompressor = []struct {
message string
input io.Reader
expect []byte
err error
readErr error
}{
{
message: "text input",
input: strings.NewReader("cayley data\n"),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "gzip input",
input: bytes.NewReader([]byte{
0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad,
0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00,
}),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bzip2 input",
input: bytes.NewReader([]byte{
0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00,
0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c,
0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16,
0xa9, 0x7c, 0x78, 0x80,
}),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bad gzip input",
input: strings.NewReader("\x1f\x8bcayley data\n"),
err: gzip.ErrHeader,
expect: nil,
readErr: nil,
},
{
message: "bad bzip2 input",
input: strings.NewReader("\x42\x5a\x68cayley data\n"),
err: nil,
expect: nil,
readErr: bzip2.StructuralError("invalid compression level"),
},
}
func TestDecompressor(t *testing.T) {
for _, test := range testDecompressor {
r, err := decompressor(test.input)
if err != test.err {
t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err)
}
if err != nil {
continue
}
p := make([]byte, len(test.expect)*2)
n, err := r.Read(p)
if err != test.readErr {
t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err)
}
if bytes.Compare(p[:n], test.expect) != 0 {
t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect)
}
}
}

View file

@ -25,6 +25,7 @@ import (
"github.com/barakmich/glog"
"github.com/julienschmidt/httprouter"
"github.com/google/cayley/internal"
"github.com/google/cayley/quad"
"github.com/google/cayley/quad/cquads"
)
@ -75,7 +76,6 @@ func (api *API) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params
glog.Errorln(err)
return jsonResponse(w, 500, "Couldn't read file: "+err.Error())
}
defer formFile.Close()
blockSize, blockErr := strconv.ParseInt(r.URL.Query().Get("block_size"), 10, 64)
@ -83,8 +83,9 @@ func (api *API) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params
blockSize = int64(api.config.LoadSize)
}
quadReader, err := internal.Decompressor(formFile)
// TODO(kortschak) Make this configurable from the web UI.
dec := cquads.NewDecoder(formFile)
dec := cquads.NewDecoder(quadReader)
h, err := api.GetHandleForRequest(r)
if err != nil {
@ -101,7 +102,7 @@ func (api *API) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params
if err == io.EOF {
break
}
panic("what can do this here?") // FIXME(kortschak)
glog.Fatalln("what can do this here?", err) // FIXME(kortschak)
}
block = append(block, t)
n++

46
internal/decompressor.go Normal file
View file

@ -0,0 +1,46 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package internal
import (
"bufio"
"bytes"
"compress/bzip2"
"compress/gzip"
"io"
)
const (
gzipMagic = "\x1f\x8b"
b2zipMagic = "BZh"
)
// Decompressor detects the file type of an io.Reader between
// bzip, gzip, or raw quad file.
func Decompressor(r io.Reader) (io.Reader, error) {
br := bufio.NewReader(r)
buf, err := br.Peek(3)
if err != nil {
return nil, err
}
switch {
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
return gzip.NewReader(br)
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
return bzip2.NewReader(br), nil
default:
return br, nil
}
}

View file

@ -0,0 +1,96 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package internal
import (
"bytes"
"compress/bzip2"
"compress/gzip"
"io"
"strings"
"testing"
)
var testDecompressor = []struct {
message string
input io.Reader
expect []byte
err error
readErr error
}{
{
message: "text input",
input: strings.NewReader("cayley data\n"),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "gzip input",
input: bytes.NewReader([]byte{
0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad,
0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00,
}),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bzip2 input",
input: bytes.NewReader([]byte{
0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00,
0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c,
0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16,
0xa9, 0x7c, 0x78, 0x80,
}),
err: nil,
expect: []byte("cayley data\n"),
readErr: nil,
},
{
message: "bad gzip input",
input: strings.NewReader("\x1f\x8bcayley data\n"),
err: gzip.ErrHeader,
expect: nil,
readErr: nil,
},
{
message: "bad bzip2 input",
input: strings.NewReader("\x42\x5a\x68cayley data\n"),
err: nil,
expect: nil,
readErr: bzip2.StructuralError("invalid compression level"),
},
}
func TestDecompressor(t *testing.T) {
for _, test := range testDecompressor {
r, err := Decompressor(test.input)
if err != test.err {
t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err)
}
if err != nil {
continue
}
p := make([]byte, len(test.expect)*2)
n, err := r.Read(p)
if err != test.readErr {
t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err)
}
if bytes.Compare(p[:n], test.expect) != 0 {
t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect)
}
}
}