cayley/db/load.go
kortschak 41f6d3fd84 Temporarily use cquads only
I intend to make this configurable, but there is tight connection
between db.Load and db.Open that is getting in the way of that.

Testing on data set 30kmoviedata.cq.gz created by doing:

zcat 30kmoviedata.nq.gz | sed 's/[<>]//g' | gzip -c > 30kmoviedata.cq.gz

The following query is successful:

[{
  "type": "/film/film",
  "name": null,
  "/film/film/directed_by": {
    "name": "David Fincher"
  },
  "/film/film/starring": [{
    "/film/performance/actor": {
      "name": null
    }
  }]
}]

TODO: fix up naming for quads and make strict parsing an option.
2014-07-28 21:56:32 +09:30

104 lines
2.1 KiB
Go

// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package db
import (
"bytes"
"compress/bzip2"
"compress/gzip"
"fmt"
"io"
"os"
"github.com/barakmich/glog"
"github.com/google/cayley/config"
"github.com/google/cayley/graph"
"github.com/google/cayley/quad"
"github.com/google/cayley/quad/cquads"
)
func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
f, err := os.Open(path)
if err != nil {
return fmt.Errorf("could not open file %q: %v", path, err)
}
defer f.Close()
r, err := decompressor(f)
if err != nil {
glog.Fatalln(err)
}
dec := cquads.NewDecoder(r)
bulker, canBulk := ts.(graph.BulkLoader)
if canBulk {
err = bulker.BulkLoad(dec)
if err == nil {
return nil
}
if err == graph.ErrCannotBulkLoad {
err = nil
}
}
if err != nil {
return err
}
block := make([]*quad.Quad, 0, cfg.LoadSize)
for {
t, err := dec.Unmarshal()
if err != nil {
if err == io.EOF {
break
}
return err
}
block = append(block, t)
if len(block) == cap(block) {
ts.AddTripleSet(block)
block = block[:0]
}
}
ts.AddTripleSet(block)
return nil
}
const (
gzipMagic = "\x1f\x8b"
b2zipMagic = "BZh"
)
type readAtReader interface {
io.Reader
io.ReaderAt
}
func decompressor(r readAtReader) (io.Reader, error) {
var buf [3]byte
_, err := r.ReadAt(buf[:], 0)
if err != nil {
return nil, err
}
switch {
case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0:
return gzip.NewReader(r)
case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0:
return bzip2.NewReader(r), nil
default:
return r, nil
}
}