Appengine datastore backend implementation, beta version, not optimized. \n CR: nobody \n Tests: Unit tests, manual

This commit is contained in:
= 2014-11-28 13:32:35 +01:00 committed by panamafrancis
parent b123cd673e
commit 5e61e2ecad
3 changed files with 1187 additions and 0 deletions

View file

@ -0,0 +1,322 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build appengine
package gaedatastore
import (
"fmt"
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad"
"appengine/datastore"
"github.com/barakmich/glog"
)
type Iterator struct {
uid uint64
size int64
tags graph.Tagger
dir quad.Direction
qs *QuadStore
name string
isAll bool
kind string
hash string
done bool
buffer []string
offset int
last string
result graph.Value
}
var (
bufferSize = 50
)
func NewIterator(qs *QuadStore, k string, d quad.Direction, val graph.Value) *Iterator {
t := val.(*Token)
if t == nil {
glog.Error("Token == nil")
}
if t.Kind != nodeKind {
glog.Error("Cannot create an iterator from a non-node value")
return &Iterator{done: true}
}
if k != nodeKind && k != quadKind {
glog.Error("Cannot create iterator for unknown kind")
return &Iterator{done: true}
}
if qs.context == nil {
glog.Error("Cannot create iterator without a valid context")
return &Iterator{done: true}
}
name := qs.NameOf(t)
// The number of references to this node is held in the nodes entity
key := qs.createKeyFromToken(t)
foundNode := new(NodeEntry)
err := datastore.Get(qs.context, key, foundNode)
if err != nil && err != datastore.ErrNoSuchEntity {
glog.Errorf("Error: %v", err)
return &Iterator{done: true}
}
size := foundNode.Size
return &Iterator{
uid: iterator.NextUID(),
name: name,
dir: d,
qs: qs,
size: size,
isAll: false,
kind: k,
hash: t.Hash,
done: false,
}
}
func NewAllIterator(qs *QuadStore, kind string) *Iterator {
if kind != nodeKind && kind != quadKind {
glog.Error("Cannot create iterator for an unknown kind")
return &Iterator{done: true}
}
if qs.context == nil {
glog.Error("Cannot create iterator without a valid context")
return &Iterator{done: true}
}
var size int64
if kind == nodeKind {
size = qs.NodeSize()
} else {
size = qs.Size()
}
return &Iterator{
uid: iterator.NextUID(),
qs: qs,
size: size,
dir: quad.Any,
isAll: true,
kind: kind,
done: false,
}
}
func (it *Iterator) UID() uint64 {
return it.uid
}
func (it *Iterator) Reset() {
it.buffer = nil
it.offset = 0
it.done = false
it.last = ""
it.result = nil
}
func (it *Iterator) Close() {
it.buffer = nil
it.offset = 0
it.done = true
it.last = ""
it.result = nil
}
func (it *Iterator) Tagger() *graph.Tagger {
return &it.tags
}
func (it *Iterator) Contains(v graph.Value) bool {
graph.ContainsLogIn(it, v)
if it.isAll {
// The result needs to be set, so when contains is called, the result can be retrieved
it.result = v
return graph.ContainsLogOut(it, v, true)
}
t := v.(*Token)
if t == nil {
glog.Error("Could not cast to token")
return graph.ContainsLogOut(it, v, false)
}
if t.Kind == nodeKind {
glog.Error("Contains does not work with node values")
return graph.ContainsLogOut(it, v, false)
}
// Contains is for when you want to know that an iterator refers to a quad
var offset int
switch it.dir {
case quad.Subject:
offset = 0
case quad.Predicate:
offset = (it.qs.hashSize * 2)
case quad.Object:
offset = (it.qs.hashSize * 2) * 2
case quad.Label:
offset = (it.qs.hashSize * 2) * 3
}
val := t.Hash[offset : offset+(it.qs.hashSize*2)]
if val == it.hash {
return graph.ContainsLogOut(it, v, true)
}
return graph.ContainsLogOut(it, v, false)
}
func (it *Iterator) TagResults(dst map[string]graph.Value) {
for _, tag := range it.tags.Tags() {
dst[tag] = it.Result()
}
for tag, value := range it.tags.Fixed() {
dst[tag] = value
}
}
func (it *Iterator) Clone() graph.Iterator {
if it.isAll {
m := NewAllIterator(it.qs, it.kind)
m.tags.CopyFrom(it)
return m
}
m := NewIterator(it.qs, it.kind, it.dir, it.hash)
m.tags.CopyFrom(it)
return m
}
func (it *Iterator) NextPath() bool {
return false
}
// No subiterators.
func (it *Iterator) SubIterators() []graph.Iterator {
return nil
}
func (it *Iterator) ResultTree() *graph.ResultTree {
return graph.NewResultTree(it.Result())
}
func (it *Iterator) Result() graph.Value {
return it.result
}
func (it *Iterator) Next() bool {
if it.offset+1 < len(it.buffer) {
it.offset++
it.result = &Token{Kind: it.kind, Hash: it.buffer[it.offset]}
return true
}
if it.done {
return false
}
// Reset buffer and offset
it.offset = 0
it.buffer = make([]string, 0, bufferSize)
// Create query
// TODO (stefankoshiw) Keys only query?
q := datastore.NewQuery(it.kind).Limit(bufferSize)
if !it.isAll {
// Filter on the direction {subject,objekt...}
q = q.Filter(it.dir.String()+" =", it.name)
}
// Get last cursor position
cursor, err := datastore.DecodeCursor(it.last)
if err == nil {
q = q.Start(cursor)
}
// Buffer the keys of the next 50 matches
t := q.Run(it.qs.context)
for {
// Quirk of the datastore, you cannot pass a nil value to to Next()
// even if you just want the keys
var k *datastore.Key
skip := false
if it.kind == quadKind {
temp := new(QuadEntry)
k, err = t.Next(temp)
// Skip if quad has been deleted
if len(temp.Added) <= len(temp.Deleted) {
skip = true
}
} else {
temp := new(NodeEntry)
k, err = t.Next(temp)
// Skip if node has been deleted
if temp.Size == 0 {
skip = true
}
}
if err == datastore.Done {
it.done = true
break
}
if err != nil {
glog.Errorf("Error fetching next entry %v", err)
break
}
if !skip {
it.buffer = append(it.buffer, k.StringID())
}
}
// Save cursor position
cursor, err = t.Cursor()
if err == nil {
it.last = cursor.String()
}
// Protect against bad queries
if it.done && len(it.buffer) == 0 {
glog.Warningf("Query did not return any results")
return false
}
// First result
it.result = &Token{Kind: it.kind, Hash: it.buffer[it.offset]}
return true
}
func (it *Iterator) Size() (int64, bool) {
return it.size, true
}
var gaedatastoreType graph.Type
func init() {
gaedatastoreType = graph.RegisterIterator("gaedatastore")
}
func Type() graph.Type { return gaedatastoreType }
func (it *Iterator) Type() graph.Type { return gaedatastoreType }
func (it *Iterator) Sorted() bool { return false }
func (it *Iterator) Optimize() (graph.Iterator, bool) { return it, false }
func (it *Iterator) Describe() graph.Description {
size, _ := it.Size()
return graph.Description{
UID: it.UID(),
Name: fmt.Sprintf("%s/%s", it.name, it.hash),
Type: it.Type(),
Size: size,
Tags: it.tags.Tags(),
Direction: it.dir,
}
}
// TODO (stefankoshiw) calculate costs
func (it *Iterator) Stats() graph.IteratorStats {
size, _ := it.Size()
return graph.IteratorStats{
ContainsCost: 1,
NextCost: 5,
Size: size,
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,315 @@
// Copyright 2014 The Cayley Authors. All rights reserved. //
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gaedatastore
import (
"sort"
"testing"
"errors"
"github.com/barakmich/glog"
"github.com/google/cayley/graph"
"github.com/google/cayley/graph/iterator"
"github.com/google/cayley/quad"
"github.com/google/cayley/writer"
"reflect"
"appengine/aetest"
)
// This is a simple test graph.
//
// +---+ +---+
// | A |------- ->| F |<--
// +---+ \------>+---+-/ +---+ \--+---+
// ------>|#B#| | | E |
// +---+-------/ >+---+ | +---+
// | C | / v
// +---+ -/ +---+
// ---- +---+/ |#G#|
// \-->|#D#|------------->+---+
// +---+
//
var simpleGraph = []quad.Quad{
{"A", "follows", "B", ""},
{"C", "follows", "B", ""},
{"C", "follows", "D", ""},
{"D", "follows", "B", ""},
{"B", "follows", "F", ""},
{"F", "follows", "G", ""},
{"D", "follows", "G", ""},
{"E", "follows", "F", ""},
{"B", "status", "cool", "status_graph"},
{"D", "status", "cool", "status_graph"},
{"G", "status", "cool", "status_graph"},
}
var simpleGraphUpdate = []quad.Quad{
{"A", "follows", "B", ""},
{"F", "follows", "B", ""},
{"C", "follows", "D", ""},
{"X", "follows", "B", ""},
}
type pair struct {
query string
value int64
}
func makeTestStore(data []quad.Quad, opts graph.Options) (graph.QuadStore, graph.QuadWriter, []pair) {
seen := make(map[string]struct{})
qs, _ := newQuadStore("", opts)
qs, _ = newQuadStoreForRequest(qs, opts)
var (
val int64
ind []pair
)
writer, _ := writer.NewSingleReplication(qs, nil)
for _, t := range data {
for _, qp := range []string{t.Subject, t.Predicate, t.Object, t.Label} {
if _, ok := seen[qp]; !ok && qp != "" {
val++
ind = append(ind, pair{qp, val})
seen[qp] = struct{}{}
}
}
}
writer.AddQuadSet(data)
return qs, writer, ind
}
func iterateResults(qs graph.QuadStore, it graph.Iterator) []string {
var res []string
for graph.Next(it) {
v := it.Result()
if t, ok := v.(*Token); ok && t.Kind == nodeKind {
res = append(res, qs.NameOf(it.Result()))
} else {
res = append(res, qs.Quad(it.Result()).String())
}
}
sort.Strings(res)
it.Reset()
return res
}
func printIterator(qs graph.QuadStore, it graph.Iterator) {
for graph.Next(it) {
glog.Infof("%v", qs.Quad(it.Result()))
}
}
func compareResults(qs graph.QuadStore, it graph.Iterator, expect []string) ([]string, bool) {
sort.Strings(expect)
for i := 0; i < 2; i++ {
got := iterateResults(qs, it)
sort.Strings(got)
if !reflect.DeepEqual(got, expect) {
return got, false
}
}
return nil, true
}
func createInstance() (aetest.Instance, graph.Options, error) {
inst, err := aetest.NewInstance(&aetest.Options{"", true})
if err != nil {
return nil, nil, errors.New("Creation of new instance failed")
}
req1, err := inst.NewRequest("POST", "/api/v1/write", nil)
if err != nil {
return nil, nil, errors.New("Creation of new request failed")
}
opts := make(graph.Options)
opts["HTTPRequest"] = req1
if inst == nil {
glog.Info("help")
}
return inst, opts, nil
}
func TestAddRemove(t *testing.T) {
inst, opts, err := createInstance()
defer inst.Close()
if err != nil {
t.Fatalf("failed to create instance: %v", err)
}
// Add quads
qs, writer, _ := makeTestStore(simpleGraph, opts)
if qs.Size() != 11 {
t.Fatal("Incorrect number of quads")
}
all := qs.NodesAllIterator()
expect := []string{
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"follows",
"status",
"cool",
"status_graph",
}
if got, ok := compareResults(qs, all, expect); !ok {
t.Errorf("Unexpected iterated result, got:%v expect:%v", got, expect)
}
// Add more quads, some conflicts
if err := writer.AddQuadSet(simpleGraphUpdate); err != nil {
t.Errorf("AddQuadSet failed, %v", err)
}
if qs.Size() != 13 {
t.Fatal("Incorrect number of quads")
}
all = qs.NodesAllIterator()
expect = []string{
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"X",
"follows",
"status",
"cool",
"status_graph",
}
if got, ok := compareResults(qs, all, expect); !ok {
t.Errorf("Unexpected iterated result, got:%v expect:%v", got, expect)
}
// Remove quad
toRemove := quad.Quad{"X", "follows", "B", ""}
err = writer.RemoveQuad(toRemove)
if err != nil {
t.Errorf("RemoveQuad failed: %v", err)
}
expect = []string{
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"follows",
"status",
"cool",
"status_graph",
}
if got, ok := compareResults(qs, all, expect); !ok {
t.Errorf("Unexpected iterated result, got:%v expect:%v", got, expect)
}
}
func TestIterators(t *testing.T) {
glog.Info("\n-----------\n")
inst, opts, err := createInstance()
defer inst.Close()
if err != nil {
t.Fatalf("failed to create instance: %v", err)
}
qs, _, _ := makeTestStore(simpleGraph, opts)
if qs.Size() != 11 {
t.Fatal("Incorrect number of quads")
}
var expected = []string{
quad.Quad{"C", "follows", "B", ""}.String(),
quad.Quad{"C", "follows", "D", ""}.String(),
}
it := qs.QuadIterator(quad.Subject, qs.ValueOf("C"))
if got, ok := compareResults(qs, it, expected); !ok {
t.Errorf("Unexpected iterated result, got:%v expect:%v", got, expected)
}
// Test contains
it = qs.QuadIterator(quad.Label, qs.ValueOf("status_graph"))
gqs := qs.(*QuadStore)
key := gqs.createKeyForQuad(quad.Quad{"G", "status", "cool", "status_graph"})
token := &Token{quadKind, key.StringID()}
if !it.Contains(token) {
t.Error("Contains failed")
}
}
func TestIteratorsAndNextResultOrderA(t *testing.T) {
glog.Info("\n-----------\n")
inst, opts, err := createInstance()
defer inst.Close()
if err != nil {
t.Fatalf("failed to create instance: %v", err)
}
qs, _, _ := makeTestStore(simpleGraph, opts)
if qs.Size() != 11 {
t.Fatal("Incorrect number of quads")
}
fixed := qs.FixedIterator()
fixed.Add(qs.ValueOf("C"))
fixed2 := qs.FixedIterator()
fixed2.Add(qs.ValueOf("follows"))
all := qs.NodesAllIterator()
innerAnd := iterator.NewAnd()
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, fixed2, quad.Predicate))
innerAnd.AddSubIterator(iterator.NewLinksTo(qs, all, quad.Object))
hasa := iterator.NewHasA(qs, innerAnd, quad.Subject)
outerAnd := iterator.NewAnd()
outerAnd.AddSubIterator(fixed)
outerAnd.AddSubIterator(hasa)
if !outerAnd.Next() {
t.Error("Expected one matching subtree")
}
val := outerAnd.Result()
if qs.NameOf(val) != "C" {
t.Errorf("Matching subtree should be %s, got %s", "barak", qs.NameOf(val))
}
var (
got []string
expect = []string{"B", "D"}
)
for {
got = append(got, qs.NameOf(all.Result()))
if !outerAnd.NextPath() {
break
}
}
sort.Strings(got)
if !reflect.DeepEqual(got, expect) {
t.Errorf("Unexpected result, got:%q expect:%q", got, expect)
}
if outerAnd.Next() {
t.Error("More than one possible top level output?")
}
}