From e0df752618510153d2a304b7c93424557cbfc441 Mon Sep 17 00:00:00 2001 From: kortschak Date: Thu, 26 Jun 2014 08:38:15 +0930 Subject: [PATCH] Reorganize to go get will work This makes almost no changes to source, but touches every almost file. Also fixes error in gremlin test code. --- Makefile | 29 -- activate.sh | 9 - appengine/cayley-appengine.go | 20 +- config/config.go | 113 ++++++ graph/all-iterator.go | 117 ++++++ graph/and-iterator-optimize.go | 330 ++++++++++++++++ graph/and-iterator-optimize_test.go | 111 ++++++ graph/and-iterator.go | 248 ++++++++++++ graph/and-iterator_test.go | 147 +++++++ graph/fixed-iterator.go | 157 ++++++++ graph/hasa-iterator.go | 224 +++++++++++ graph/iterator.go | 304 +++++++++++++++ graph/linksto-iterator.go | 184 +++++++++ graph/linksto-iterator_test.go | 37 ++ graph/memstore/llrb-iterator.go | 119 ++++++ graph/memstore/memstore-all-iterator.go | 45 +++ graph/memstore/memtriplestore-iterator-optimize.go | 53 +++ graph/memstore/memtriplestore.go | 268 +++++++++++++ graph/memstore/memtriplestore_test.go | 138 +++++++ graph/memstore/testing_memstore.go | 45 +++ graph/mock_ts.go | 58 +++ graph/mongo/lru.go | 62 +++ graph/mongo/mongo-iterator.go | 181 +++++++++ graph/mongo/mongo-triplestore-iterator-optimize.go | 53 +++ graph/mongo/mongo-triplestore.go | 329 ++++++++++++++++ graph/optional-iterator.go | 134 +++++++ graph/or-iterator.go | 287 ++++++++++++++ graph/or-iterator_test.go | 142 +++++++ graph/query-shape.go | 189 +++++++++ graph/query-shape_test.go | 124 ++++++ graph/result-tree-evaluator.go | 70 ++++ graph/result-tree-evaluator_test.go | 42 ++ graph/session.go | 45 +++ graph/sexp/parser.go | 271 +++++++++++++ graph/sexp/parser_test.go | 129 ++++++ graph/sexp/sexp-session.go | 121 ++++++ graph/triple.go | 109 ++++++ graph/triplestore.go | 119 ++++++ graph/value-comparison-iterator.go | 193 +++++++++ graph/value-comparison-iterator_test.go | 126 ++++++ gremlin/gremlin-build-iterator.go | 315 +++++++++++++++ gremlin/gremlin-env.go | 95 +++++ gremlin/gremlin-finals.go | 274 +++++++++++++ gremlin/gremlin-functional_test.go | 230 +++++++++++ gremlin/gremlin-session.go | 266 +++++++++++++ gremlin/gremlin-traversals.go | 184 +++++++++ gremlin/gremlin_test.nt | 11 + http/cayley-http-docs.go | 73 ++++ http/cayley-http-query.go | 153 ++++++++ http/cayley-http-write.go | 119 ++++++ http/cayley-http.go | 113 ++++++ http/cayley-http_test.go | 53 +++ init.go | 40 ++ load.go | 81 ++++ make.sh | 65 ---- mql/mql-build-iterator.go | 181 +++++++++ mql/mql-fill.go | 114 ++++++ mql/mql-functional_test.go | 264 +++++++++++++ mql/mql-query.go | 111 ++++++ mql/mql-session.go | 144 +++++++ nquads/nquads.go | 196 ++++++++++ nquads/nquads_test.go | 131 +++++++ open.go | 40 ++ repl.go | 143 +++++++ src/cayley/main.go | 87 ----- src/cayley_cmd/cayley-init.go | 40 -- src/cayley_cmd/cayley-load.go | 79 ---- src/cayley_cmd/cayley-open.go | 40 -- src/cayley_cmd/cayley-repl.go | 142 ------- src/cayley_config/cayley-config.go | 113 ------ src/cayley_http/cayley-http-docs.go | 72 ---- src/cayley_http/cayley-http-query.go | 151 ------- src/cayley_http/cayley-http-write.go | 117 ------ src/cayley_http/cayley-http.go | 111 ------ src/cayley_http/cayley-http_test.go | 53 --- src/graph/all-iterator.go | 117 ------ src/graph/and-iterator-optimize.go | 330 ---------------- src/graph/and-iterator-optimize_test.go | 111 ------ src/graph/and-iterator.go | 248 ------------ src/graph/and-iterator_test.go | 147 ------- src/graph/fixed-iterator.go | 157 -------- src/graph/hasa-iterator.go | 224 ----------- src/graph/iterator.go | 304 --------------- src/graph/linksto-iterator.go | 184 --------- src/graph/linksto-iterator_test.go | 37 -- src/graph/mock_ts.go | 58 --- src/graph/optional-iterator.go | 134 ------- src/graph/or-iterator.go | 287 -------------- src/graph/or-iterator_test.go | 142 ------- src/graph/query-shape.go | 189 --------- src/graph/query-shape_test.go | 124 ------ src/graph/result-tree-evaluator.go | 70 ---- src/graph/result-tree-evaluator_test.go | 42 -- src/graph/session.go | 45 --- src/graph/triple.go | 109 ------ src/graph/triplestore.go | 119 ------ src/graph/value-comparison-iterator.go | 193 --------- src/graph/value-comparison-iterator_test.go | 126 ------ src/graph_leveldb/leveldb-all-iterator.go | 132 ------- src/graph_leveldb/leveldb-iterator.go | 210 ---------- .../leveldb-triplestore-iterator-optimize.go | 53 --- src/graph_leveldb/leveldb-triplestore.go | 427 -------------------- src/graph_leveldb/leveldb_test.go | 433 --------------------- src/graph_memstore/llrb-iterator.go | 117 ------ src/graph_memstore/memstore-all-iterator.go | 45 --- .../memtriplestore-iterator-optimize.go | 53 --- src/graph_memstore/memtriplestore.go | 266 ------------- src/graph_memstore/memtriplestore_test.go | 136 ------- src/graph_memstore/testing_memstore.go | 45 --- src/graph_mongo/lru.go | 62 --- src/graph_mongo/mongo-iterator.go | 179 --------- .../mongo-triplestore-iterator-optimize.go | 53 --- src/graph_mongo/mongo-triplestore.go | 327 ---------------- src/graph_sexp/parser.go | 270 ------------- src/graph_sexp/parser_test.go | 127 ------ src/graph_sexp/sexp-session.go | 120 ------ src/gremlin/gremlin-build-iterator.go | 313 --------------- src/gremlin/gremlin-env.go | 95 ----- src/gremlin/gremlin-finals.go | 273 ------------- src/gremlin/gremlin-functional_test.go | 228 ----------- src/gremlin/gremlin-session.go | 264 ------------- src/gremlin/gremlin-traversals.go | 184 --------- src/gremlin/gremlin_test.nt | 11 - src/mql/mql-build-iterator.go | 180 --------- src/mql/mql-fill.go | 113 ------ src/mql/mql-functional_test.go | 262 ------------- src/mql/mql-query.go | 110 ------ src/mql/mql-session.go | 142 ------- src/nquads/nquads.go | 194 --------- src/nquads/nquads_test.go | 129 ------ 130 files changed, 8766 insertions(+), 10167 deletions(-) delete mode 100644 Makefile delete mode 100644 activate.sh create mode 100644 config/config.go create mode 100644 graph/all-iterator.go create mode 100644 graph/and-iterator-optimize.go create mode 100644 graph/and-iterator-optimize_test.go create mode 100644 graph/and-iterator.go create mode 100644 graph/and-iterator_test.go create mode 100644 graph/fixed-iterator.go create mode 100644 graph/hasa-iterator.go create mode 100644 graph/iterator.go create mode 100644 graph/linksto-iterator.go create mode 100644 graph/linksto-iterator_test.go create mode 100644 graph/memstore/llrb-iterator.go create mode 100644 graph/memstore/memstore-all-iterator.go create mode 100644 graph/memstore/memtriplestore-iterator-optimize.go create mode 100644 graph/memstore/memtriplestore.go create mode 100644 graph/memstore/memtriplestore_test.go create mode 100644 graph/memstore/testing_memstore.go create mode 100644 graph/mock_ts.go create mode 100644 graph/mongo/lru.go create mode 100644 graph/mongo/mongo-iterator.go create mode 100644 graph/mongo/mongo-triplestore-iterator-optimize.go create mode 100644 graph/mongo/mongo-triplestore.go create mode 100644 graph/optional-iterator.go create mode 100644 graph/or-iterator.go create mode 100644 graph/or-iterator_test.go create mode 100644 graph/query-shape.go create mode 100644 graph/query-shape_test.go create mode 100644 graph/result-tree-evaluator.go create mode 100644 graph/result-tree-evaluator_test.go create mode 100644 graph/session.go create mode 100644 graph/sexp/parser.go create mode 100644 graph/sexp/parser_test.go create mode 100644 graph/sexp/sexp-session.go create mode 100644 graph/triple.go create mode 100644 graph/triplestore.go create mode 100644 graph/value-comparison-iterator.go create mode 100644 graph/value-comparison-iterator_test.go create mode 100644 gremlin/gremlin-build-iterator.go create mode 100644 gremlin/gremlin-env.go create mode 100644 gremlin/gremlin-finals.go create mode 100644 gremlin/gremlin-functional_test.go create mode 100644 gremlin/gremlin-session.go create mode 100644 gremlin/gremlin-traversals.go create mode 100644 gremlin/gremlin_test.nt create mode 100644 http/cayley-http-docs.go create mode 100644 http/cayley-http-query.go create mode 100644 http/cayley-http-write.go create mode 100644 http/cayley-http.go create mode 100644 http/cayley-http_test.go create mode 100644 init.go create mode 100644 load.go delete mode 100755 make.sh create mode 100644 mql/mql-build-iterator.go create mode 100644 mql/mql-fill.go create mode 100644 mql/mql-functional_test.go create mode 100644 mql/mql-query.go create mode 100644 mql/mql-session.go create mode 100644 nquads/nquads.go create mode 100644 nquads/nquads_test.go create mode 100644 open.go create mode 100644 repl.go delete mode 100644 src/cayley/main.go delete mode 100644 src/cayley_cmd/cayley-init.go delete mode 100644 src/cayley_cmd/cayley-load.go delete mode 100644 src/cayley_cmd/cayley-open.go delete mode 100644 src/cayley_cmd/cayley-repl.go delete mode 100644 src/cayley_config/cayley-config.go delete mode 100644 src/cayley_http/cayley-http-docs.go delete mode 100644 src/cayley_http/cayley-http-query.go delete mode 100644 src/cayley_http/cayley-http-write.go delete mode 100644 src/cayley_http/cayley-http.go delete mode 100644 src/cayley_http/cayley-http_test.go delete mode 100644 src/graph/all-iterator.go delete mode 100644 src/graph/and-iterator-optimize.go delete mode 100644 src/graph/and-iterator-optimize_test.go delete mode 100644 src/graph/and-iterator.go delete mode 100644 src/graph/and-iterator_test.go delete mode 100644 src/graph/fixed-iterator.go delete mode 100644 src/graph/hasa-iterator.go delete mode 100644 src/graph/iterator.go delete mode 100644 src/graph/linksto-iterator.go delete mode 100644 src/graph/linksto-iterator_test.go delete mode 100644 src/graph/mock_ts.go delete mode 100644 src/graph/optional-iterator.go delete mode 100644 src/graph/or-iterator.go delete mode 100644 src/graph/or-iterator_test.go delete mode 100644 src/graph/query-shape.go delete mode 100644 src/graph/query-shape_test.go delete mode 100644 src/graph/result-tree-evaluator.go delete mode 100644 src/graph/result-tree-evaluator_test.go delete mode 100644 src/graph/session.go delete mode 100644 src/graph/triple.go delete mode 100644 src/graph/triplestore.go delete mode 100644 src/graph/value-comparison-iterator.go delete mode 100644 src/graph/value-comparison-iterator_test.go delete mode 100644 src/graph_leveldb/leveldb-all-iterator.go delete mode 100644 src/graph_leveldb/leveldb-iterator.go delete mode 100644 src/graph_leveldb/leveldb-triplestore-iterator-optimize.go delete mode 100644 src/graph_leveldb/leveldb-triplestore.go delete mode 100644 src/graph_leveldb/leveldb_test.go delete mode 100644 src/graph_memstore/llrb-iterator.go delete mode 100644 src/graph_memstore/memstore-all-iterator.go delete mode 100644 src/graph_memstore/memtriplestore-iterator-optimize.go delete mode 100644 src/graph_memstore/memtriplestore.go delete mode 100644 src/graph_memstore/memtriplestore_test.go delete mode 100644 src/graph_memstore/testing_memstore.go delete mode 100644 src/graph_mongo/lru.go delete mode 100644 src/graph_mongo/mongo-iterator.go delete mode 100644 src/graph_mongo/mongo-triplestore-iterator-optimize.go delete mode 100644 src/graph_mongo/mongo-triplestore.go delete mode 100644 src/graph_sexp/parser.go delete mode 100644 src/graph_sexp/parser_test.go delete mode 100644 src/graph_sexp/sexp-session.go delete mode 100644 src/gremlin/gremlin-build-iterator.go delete mode 100644 src/gremlin/gremlin-env.go delete mode 100644 src/gremlin/gremlin-finals.go delete mode 100644 src/gremlin/gremlin-functional_test.go delete mode 100644 src/gremlin/gremlin-session.go delete mode 100644 src/gremlin/gremlin-traversals.go delete mode 100644 src/gremlin/gremlin_test.nt delete mode 100644 src/mql/mql-build-iterator.go delete mode 100644 src/mql/mql-fill.go delete mode 100644 src/mql/mql-functional_test.go delete mode 100644 src/mql/mql-query.go delete mode 100644 src/mql/mql-session.go delete mode 100644 src/nquads/nquads.go delete mode 100644 src/nquads/nquads_test.go diff --git a/Makefile b/Makefile deleted file mode 100644 index 3a28444..0000000 --- a/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2014 The Cayley Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -default: build - -build: - ./make.sh build - -deps: - ./make.sh deps - -test: - ls ./src | grep -v "\." | sed 's/\///g' | xargs go test -cover - -convey: - ./bin/goconvey --depth=2 - - diff --git a/activate.sh b/activate.sh deleted file mode 100644 index 6d8cabd..0000000 --- a/activate.sh +++ /dev/null @@ -1,9 +0,0 @@ -# Absolute path this script is in. /home/user/bin -cd "`dirname '${BASH_SOURCE:-$0}'`" -SCRIPTPATH="`pwd`" -echo $dir -cd - > /dev/null - -export GOPATH=$SCRIPTPATH -#export GOOS="linux" -#export GOARCH="amd64" diff --git a/appengine/cayley-appengine.go b/appengine/cayley-appengine.go index 77a0e23..c915932 100644 --- a/appengine/cayley-appengine.go +++ b/appengine/cayley-appengine.go @@ -15,22 +15,24 @@ package cayleyappengine import ( - "cayley_config" - "cayley_http" - "github.com/barakmich/glog" - "graph" - "graph_memstore" - "nquads" "os" + + "github.com/barakmich/glog" + + cfg "github.com/google/cayley/config" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/memstore" + "github.com/google/cayley/http" + "github.com/google/cayley/nquads" ) func init() { glog.SetToStderr(true) - config := cayley_config.ParseConfigFromFile("cayley_appengine.cfg") - ts := graph_memstore.NewMemTripleStore() + config := cfg.ParseConfigFromFile("cayley_appengine.cfg") + ts := memstore.NewMemTripleStore() glog.Errorln(config) LoadTriplesFromFileInto(ts, config.DatabasePath, config.LoadSize) - cayley_http.SetupRoutes(ts, config) + http.SetupRoutes(ts, config) } func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) { diff --git a/config/config.go b/config/config.go new file mode 100644 index 0000000..8e19d6c --- /dev/null +++ b/config/config.go @@ -0,0 +1,113 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cayley_config + +import ( + "encoding/json" + "flag" + "github.com/barakmich/glog" + "os" +) + +type CayleyConfig struct { + DatabaseType string `json:"database"` + DatabasePath string `json:"db_path"` + DatabaseOptions map[string]interface{} `json:"db_options"` + ListenHost string `json:"listen_host"` + ListenPort string `json:"listen_port"` + ReadOnly bool `json:"read_only"` + GremlinTimeout int `json:"gremlin_timeout"` + LoadSize int `json:"load_size"` +} + +var databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") +var databaseBackend = flag.String("db", "mem", "Database Backend.") +var host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).") +var loadSize = flag.Int("load_size", 10000, "Size of triplesets to load") +var port = flag.String("port", "64210", "Port to listen on.") +var readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.") +var gremlinTimeout = flag.Int("gremlin_timeout", 30, "Number of seconds until an individual query times out.") + +func ParseConfigFromFile(filename string) *CayleyConfig { + config := &CayleyConfig{} + if filename == "" { + return config + } + f, err := os.Open(filename) + if err != nil { + glog.Fatalln("Couldn't open config file", filename) + } + + defer f.Close() + + dec := json.NewDecoder(f) + err = dec.Decode(config) + if err != nil { + glog.Fatalln("Couldn't read config file:", err) + } + return config +} + +func ParseConfigFromFlagsAndFile(fileFlag string) *CayleyConfig { + // Find the file... + var trueFilename string + if fileFlag != "" { + if _, err := os.Stat(fileFlag); os.IsNotExist(err) { + glog.Fatalln("Cannot find specified configuration file", fileFlag, ", aborting.") + } else { + trueFilename = fileFlag + } + } else { + if _, err := os.Stat(os.Getenv("CAYLEY_CFG")); err == nil { + trueFilename = os.Getenv("CAYLEY_CFG") + } else { + if _, err := os.Stat("/etc/cayley.cfg"); err == nil { + trueFilename = "/etc/cayley.cfg" + } + } + } + if trueFilename == "" { + glog.Infoln("Couldn't find a config file in either $CAYLEY_CFG or /etc/cayley.cfg. Going by flag defaults only.") + } + config := ParseConfigFromFile(trueFilename) + + if config.DatabasePath == "" { + config.DatabasePath = *databasePath + } + + if config.DatabaseType == "" { + config.DatabaseType = *databaseBackend + } + + if config.ListenHost == "" { + config.ListenHost = *host + } + + if config.ListenPort == "" { + config.ListenPort = *port + } + + if config.GremlinTimeout == 0 { + config.GremlinTimeout = *gremlinTimeout + } + + if config.LoadSize == 0 { + config.LoadSize = *loadSize + } + + config.ReadOnly = config.ReadOnly || *readOnly + + return config +} diff --git a/graph/all-iterator.go b/graph/all-iterator.go new file mode 100644 index 0000000..6068746 --- /dev/null +++ b/graph/all-iterator.go @@ -0,0 +1,117 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines one of the base iterators, the All iterator. Which, logically +// enough, represents all nodes or all links in the graph. +// +// This particular file is actually vestigal. It's up to the TripleStore to give +// us an All iterator that represents all things in the graph. So this is +// really the All iterator for the MemTripleStore. That said, it *is* one of +// the base iterators, and it helps just to see it here. + +import ( + "fmt" + "strings" +) + +// An All iterator across a range of int64 values, from `max` to `min`. +type Int64AllIterator struct { + BaseIterator + max, min int64 + at int64 +} + +// Creates a new Int64AllIterator with the given range. +func NewInt64AllIterator(min, max int64) *Int64AllIterator { + var all Int64AllIterator + BaseIteratorInit(&all.BaseIterator) + all.max = max + all.min = min + all.at = min + return &all +} + +// Start back at the beginning +func (a *Int64AllIterator) Reset() { + a.at = a.min +} + +func (a *Int64AllIterator) Close() { +} + +func (a *Int64AllIterator) Clone() Iterator { + out := NewInt64AllIterator(a.min, a.max) + out.CopyTagsFrom(a) + return out +} + +// Prints the All iterator as just an "all". +func (a *Int64AllIterator) DebugString(indent int) string { + return fmt.Sprintf("%s(%s)", strings.Repeat(" ", indent), a.Type()) +} + +// Next() on an Int64 all iterator is a simple incrementing counter. +// Return the next integer, and mark it as the result. +func (a *Int64AllIterator) Next() (TSVal, bool) { + NextLogIn(a) + if a.at == -1 { + return NextLogOut(a, nil, false) + } + val := a.at + a.at = a.at + 1 + if a.at > a.max { + a.at = -1 + } + a.Last = val + return NextLogOut(a, val, true) +} + +// The number of elements in an Int64AllIterator is the size of the range. +// The size is exact. +func (a *Int64AllIterator) Size() (int64, bool) { + Size := ((a.max - a.min) + 1) + return Size, true +} + +// Check() for an Int64AllIterator is merely seeing if the passed value is +// withing the range, assuming the value is an int64. +func (a *Int64AllIterator) Check(tsv TSVal) bool { + CheckLogIn(a, tsv) + v := tsv.(int64) + if a.min <= v && v <= a.max { + a.Last = v + return CheckLogOut(a, v, true) + } + return CheckLogOut(a, v, false) +} + +// The type of this iterator is an "all". This is important, as it puts it in +// the class of "all iterators. +func (a *Int64AllIterator) Type() string { return "all" } + +// There's nothing to optimize about this little iterator. +func (a *Int64AllIterator) Optimize() (Iterator, bool) { return a, false } + +// Stats for an Int64AllIterator are simple. Super cheap to do any operation, +// and as big as the range. +func (a *Int64AllIterator) GetStats() *IteratorStats { + s, _ := a.Size() + return &IteratorStats{ + CheckCost: 1, + NextCost: 1, + Size: s, + } +} diff --git a/graph/and-iterator-optimize.go b/graph/and-iterator-optimize.go new file mode 100644 index 0000000..950d681 --- /dev/null +++ b/graph/and-iterator-optimize.go @@ -0,0 +1,330 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Perhaps the most tricky file in this entire module. Really a method on the +// AndIterator, but important enough to deserve its own file. +// +// Calling Optimize() on an And iterator, like any iterator, requires that we +// preserve the underlying meaning. However, the And has many choices, namely, +// which one of it's subiterators will be the branch that does the Next()ing, +// and which ordering of the remaining iterators is the most efficient. In +// short, this is where a lot of the query optimization happens, and there are +// many wins to be had here, as well as many bad bugs. The worst class of bug +// changes the meaning of the query. The second worst class makes things really +// slow. +// +// The good news is this: If Optimize() is never called (turned off, perhaps) we can +// be sure the results are as good as the query language called for. +// +// In short, tread lightly. + +import ( + "container/list" +) + +// Optimizes the AndIterator, by picking the most efficient way to Next() and +// Check() its subiterators. For SQL fans, this is equivalent to JOIN. +func (and *AndIterator) Optimize() (Iterator, bool) { + // First, let's get the list of iterators, in order (first one is Next()ed, + // the rest are Check()ed) + oldItList := and.GetSubIterators() + + // And call Optimize() on our subtree, replacing each one in the order we + // found them. it_list is the newly optimized versions of these, and changed + // is another list, of only the ones that have returned replacements and + // changed. + itList := optimizeSubIterators(oldItList) + + // Close the replaced iterators (they ought to close themselves, but Close() + // is idempotent, so this just protects against any machinations). + closeIteratorList(oldItList, nil) + + // If we can find only one subiterator which is equivalent to this whole and, + // we can replace the And... + out := and.optimizeReplacement(itList) + if out != nil { + // ...Move the tags to the replacement... + moveTagsTo(out, and) + // ...Close everyone except `out`, our replacement... + closeIteratorList(itList, out) + // ...And return it. + return out, true + } + + // And now, without changing any of the iterators, we reorder them. it_list is + // now a permutation of itself, but the contents are unchanged. + itList = optimizeOrder(itList) + + // Okay! At this point we have an optimized order. + + // The easiest thing to do at this point is merely to create a new And iterator + // and replace ourselves with our (reordered, optimized) clone. + newAnd := NewAndIterator() + + // Add the subiterators in order. + for e := itList.Front(); e != nil; e = e.Next() { + newAnd.AddSubIterator(e.Value.(Iterator)) + } + + // Move the tags hanging on us (like any good replacement). + newAnd.CopyTagsFrom(and) + + newAnd.optimizeCheck() + + // And close ourselves but not our subiterators -- some may still be alive in + // the new And (they were unchanged upon calling Optimize() on them, at the + // start). + and.cleanUp() + return newAnd, true +} + +// Closes a list of iterators, except the one passed in `except`. Closes all +// of the iterators in the list if `except` is nil. +func closeIteratorList(l *list.List, except Iterator) { + for e := l.Front(); e != nil; e = e.Next() { + it := e.Value.(Iterator) + if it != except { + e.Value.(Iterator).Close() + } + } +} + +// Find if there is a single subiterator which is a valid replacement for this +// AndIterator. +func (and *AndIterator) optimizeReplacement(itList *list.List) Iterator { + // If we were created with no SubIterators, we're as good as Null. + if itList.Len() == 0 { + return &NullIterator{} + } + if itList.Len() == 1 { + // When there's only one iterator, there's only one choice. + return itList.Front().Value.(Iterator) + } + // If any of our subiterators, post-optimization, are also Null, then + // there's no point in continuing the branch, we will have no results + // and we are null as well. + if hasAnyNullIterators(itList) { + return &NullIterator{} + } + + // If we have one useful iterator, use that. + it := hasOneUsefulIterator(itList) + if it != nil { + return it + } + return nil +} + +// optimizeOrder(l) takes a list and returns a list, containing the same contents +// but with a new ordering, however it wishes. +func optimizeOrder(l *list.List) *list.List { + out := list.New() + var bestIt Iterator + bestCost := int64(1 << 62) + // bad contains iterators that can't be (efficiently) nexted, such as + // "optional" or "not". Separate them out and tack them on at the end. + bad := list.New() + + // Find the iterator with the projected "best" total cost. + // Total cost is defined as The Next()ed iterator's cost to Next() out + // all of it's contents, and to Check() each of those against everyone + // else. + for e := l.Front(); e != nil; e = e.Next() { + it := e.Value.(Iterator) + if !it.Nextable() { + bad.PushBack(it) + continue + } + rootStats := e.Value.(Iterator).GetStats() + projectedCost := rootStats.NextCost + for f := l.Front(); f != nil; f = f.Next() { + if !f.Value.(Iterator).Nextable() { + continue + } + if f == e { + continue + } + stats := f.Value.(Iterator).GetStats() + projectedCost += stats.CheckCost + } + projectedCost = projectedCost * rootStats.Size + if projectedCost < bestCost { + bestIt = it + bestCost = projectedCost + } + } + + // TODO(barakmich): Optimization of order need not stop here. Picking a smart + // Check() order based on probability of getting a false Check() first is + // useful (fail faster). + + // Put the best iterator (the one we wish to Next()) at the front... + out.PushBack(bestIt) + // ...And push everyone else after... + for e := l.Front(); e != nil; e = e.Next() { + thisIt := e.Value.(Iterator) + if !thisIt.Nextable() { + continue + } + if thisIt != bestIt { + out.PushBack(thisIt) + } + } + // ...And finally, the difficult children on the end. + out.PushBackList(bad) + return out +} + +// optimizeCheck(l) creates an alternate check list, containing the same contents +// but with a new ordering, however it wishes. +func (and *AndIterator) optimizeCheck() { + subIts := and.GetSubIterators() + out := list.New() + + // Find the iterator with the lowest Check() cost, push it to the front, repeat. + for subIts.Len() != 0 { + var best *list.Element + bestCost := int64(1 << 62) + for e := subIts.Front(); e != nil; e = e.Next() { + it := e.Value.(Iterator) + rootStats := it.GetStats() + projectedCost := rootStats.CheckCost + if projectedCost < bestCost { + best = e + bestCost = projectedCost + } + } + out.PushBack(best.Value) + subIts.Remove(best) + } + + and.checkList = out +} + +// If we're replacing ourselves by a single iterator, we need to grab the +// result tags from the iterators that, while still valid and would hold +// the same values as this and, are not going to stay. +// getSubTags() returns a map of the tags for all the subiterators. +func (and *AndIterator) getSubTags() map[string]bool { + subs := and.GetSubIterators() + tags := make(map[string]bool) + for e := subs.Front(); e != nil; e = e.Next() { + it := e.Value.(Iterator) + for _, tag := range it.Tags() { + tags[tag] = true + } + } + for _, tag := range and.Tags() { + tags[tag] = true + } + return tags +} + +// moveTagsTo() gets the tags for all of the And's subiterators and the +// And itself, and moves them to `out`. +func moveTagsTo(out Iterator, and *AndIterator) { + tagmap := and.getSubTags() + for _, tag := range out.Tags() { + if tagmap[tag] { + delete(tagmap, tag) + } + } + for k, _ := range tagmap { + out.AddTag(k) + } +} + +// optimizeSubIterators(l) takes a list of iterators and calls Optimize() on all +// of them. It returns two lists -- the first contains the same list as l, where +// any replacements are made by Optimize() and the second contains the originals +// which were replaced. +func optimizeSubIterators(l *list.List) *list.List { + itList := list.New() + for e := l.Front(); e != nil; e = e.Next() { + it := e.Value.(Iterator) + newIt, change := it.Optimize() + if change { + itList.PushBack(newIt) + } else { + itList.PushBack(it.Clone()) + } + } + return itList +} + +// Check a list of iterators for any Null iterators. +func hasAnyNullIterators(l *list.List) bool { + for e := l.Front(); e != nil; e = e.Next() { + it := e.Value.(Iterator) + if it.Type() == "null" { + return true + } + } + return false +} + +// There are two "not-useful" iterators -- namely "null" which returns +// nothing, and "all" which returns everything. Particularly, we want +// to see if we're intersecting with a bunch of "all" iterators, and, +// if we are, then we have only one useful iterator. +func hasOneUsefulIterator(l *list.List) Iterator { + usefulCount := 0 + var usefulIt Iterator + for e := l.Front(); e != nil; e = e.Next() { + it := e.Value.(Iterator) + switch it.Type() { + case "null", "all": + continue + case "optional": + // Optional is weird -- it's not useful, but we can't optimize + // away from it. Therefore, we skip this optimization + // if we see one. + return nil + default: + usefulCount++ + usefulIt = it + } + } + + if usefulCount == 1 { + return usefulIt + } + return nil +} + +// and.GetStats() lives here in and-iterator-optimize.go because it may +// in the future return different statistics based on how it is optimized. +// For now, however, it's pretty static. +func (and *AndIterator) GetStats() *IteratorStats { + primaryStats := and.primaryIt.GetStats() + CheckCost := primaryStats.CheckCost + NextCost := primaryStats.NextCost + Size := primaryStats.Size + for _, it := range and.internalIterators { + stats := it.GetStats() + NextCost += stats.CheckCost + CheckCost += stats.CheckCost + if Size > stats.Size { + Size = stats.Size + } + } + return &IteratorStats{ + CheckCost: CheckCost, + NextCost: NextCost, + Size: Size, + } + +} diff --git a/graph/and-iterator-optimize_test.go b/graph/and-iterator-optimize_test.go new file mode 100644 index 0000000..4ecee81 --- /dev/null +++ b/graph/and-iterator-optimize_test.go @@ -0,0 +1,111 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Tests relating to methods in and-iterator-optimize. Many are pretty simplistic, but +// nonetheless cover a lot of basic cases. + +import ( + "reflect" + "sort" + "testing" +) + +func TestIteratorPromotion(t *testing.T) { + all := NewInt64AllIterator(1, 3) + fixed := newFixedIterator() + fixed.AddValue(3) + a := NewAndIterator() + a.AddSubIterator(all) + a.AddSubIterator(fixed) + all.AddTag("a") + fixed.AddTag("b") + a.AddTag("c") + newIt, changed := a.Optimize() + if !changed { + t.Error("Iterator didn't optimize") + } + if newIt.Type() != "fixed" { + t.Error("Expected fixed iterator") + } + tagsExpected := []string{"a", "b", "c"} + tags := newIt.Tags() + sort.Strings(tags) + if !reflect.DeepEqual(tags, tagsExpected) { + t.Fatal("Tags don't match") + } +} + +func TestNullIteratorAnd(t *testing.T) { + all := NewInt64AllIterator(1, 3) + null := NewNullIterator() + a := NewAndIterator() + a.AddSubIterator(all) + a.AddSubIterator(null) + newIt, changed := a.Optimize() + if !changed { + t.Error("Didn't change") + } + if newIt.Type() != "null" { + t.Error("Expected null iterator, got ", newIt.Type()) + } +} + +func TestReorderWithTag(t *testing.T) { + all := NewInt64AllIterator(100, 300) + all.AddTag("good") + all2 := NewInt64AllIterator(1, 30000) + all2.AddTag("slow") + a := NewAndIterator() + // Make all2 the default iterator + a.AddSubIterator(all2) + a.AddSubIterator(all) + + newIt, changed := a.Optimize() + if !changed { + t.Error("Expected new iterator") + } + expectedTags := []string{"good", "slow"} + tagsOut := make([]string, 0) + l := newIt.GetSubIterators() + for e := l.Front(); e != nil; e = e.Next() { + for _, x := range e.Value.(Iterator).Tags() { + tagsOut = append(tagsOut, x) + } + } + if !reflect.DeepEqual(expectedTags, tagsOut) { + t.Fatal("Tags don't match") + } +} + +func TestAndStatistics(t *testing.T) { + all := NewInt64AllIterator(100, 300) + all.AddTag("good") + all2 := NewInt64AllIterator(1, 30000) + all2.AddTag("slow") + a := NewAndIterator() + // Make all2 the default iterator + a.AddSubIterator(all2) + a.AddSubIterator(all) + stats1 := a.GetStats() + newIt, changed := a.Optimize() + if !changed { + t.Error("Didn't optimize") + } + stats2 := newIt.GetStats() + if stats2.NextCost > stats1.NextCost { + t.Error("And didn't optimize. Next cost old ", stats1.NextCost, "and new ", stats2.NextCost) + } +} diff --git a/graph/and-iterator.go b/graph/and-iterator.go new file mode 100644 index 0000000..a3458aa --- /dev/null +++ b/graph/and-iterator.go @@ -0,0 +1,248 @@ +// Defines the And iterator, one of the base iterators. And requires no +// knowledge of the constituent TripleStore; its sole purpose is to act as an +// intersection operator across the subiterators it is given. If one iterator +// contains [1,3,5] and another [2,3,4] -- then And is an iterator that +// 'contains' [3] +// +// It accomplishes this in one of two ways. If it is a Next()ed iterator (that +// is, it is a top level iterator, or on the "Next() path", then it will Next() +// it's primary iterator (helpfully, and.primary_it) and Check() the resultant +// value against it's other iterators. If it matches all of them, then it +// returns that value. Otherwise, it repeats the process. +// +// If it's on a Check() path, it merely Check()s every iterator, and returns the +// logical AND of each result. + +package graph + +import ( + "container/list" + "fmt" + "strings" +) + +// The And iterator. Consists of a BaseIterator and a number of subiterators, the primary of which will +// be Next()ed if next is called. +type AndIterator struct { + BaseIterator + internalIterators []Iterator + itCount int + primaryIt Iterator + checkList *list.List +} + +// Creates a new And iterator. +func NewAndIterator() *AndIterator { + var and AndIterator + BaseIteratorInit(&and.BaseIterator) + and.internalIterators = make([]Iterator, 0, 20) + and.checkList = nil + return &and +} + +// Reset all internal iterators +func (and *AndIterator) Reset() { + and.primaryIt.Reset() + for _, it := range and.internalIterators { + it.Reset() + } + and.checkList = nil +} + +func (and *AndIterator) Clone() Iterator { + newAnd := NewAndIterator() + newAnd.AddSubIterator(and.primaryIt.Clone()) + newAnd.CopyTagsFrom(and) + for _, it := range and.internalIterators { + newAnd.AddSubIterator(it.Clone()) + } + if and.checkList != nil { + newAnd.optimizeCheck() + } + return newAnd +} + +// Returns a list.List of the subiterators, in order (primary iterator first). +func (and *AndIterator) GetSubIterators() *list.List { + l := list.New() + l.PushBack(and.primaryIt) + for _, it := range and.internalIterators { + l.PushBack(it) + } + return l +} + +// Overrides BaseIterator TagResults, as it needs to add it's own results and +// recurse down it's subiterators. +func (and *AndIterator) TagResults(out *map[string]TSVal) { + and.BaseIterator.TagResults(out) + if and.primaryIt != nil { + and.primaryIt.TagResults(out) + } + for _, it := range and.internalIterators { + it.TagResults(out) + } +} + +// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators. +func (and *AndIterator) GetResultTree() *ResultTree { + tree := NewResultTree(and.LastResult()) + tree.AddSubtree(and.primaryIt.GetResultTree()) + for _, it := range and.internalIterators { + tree.AddSubtree(it.GetResultTree()) + } + return tree +} + +// Prints information about this iterator. +func (and *AndIterator) DebugString(indent int) string { + var total string + for i, it := range and.internalIterators { + total += strings.Repeat(" ", indent+2) + total += fmt.Sprintf("%d:\n%s\n", i, it.DebugString(indent+4)) + } + var tags string + for _, k := range and.Tags() { + tags += fmt.Sprintf("%s;", k) + } + spaces := strings.Repeat(" ", indent+2) + + return fmt.Sprintf("%s(%s %d\n%stags:%s\n%sprimary_it:\n%s\n%sother_its:\n%s)", + strings.Repeat(" ", indent), + and.Type(), + and.GetUid(), + spaces, + tags, + spaces, + and.primaryIt.DebugString(indent+4), + spaces, + total) +} + +// Add a subiterator to this And iterator. +// +// The first iterator that is added becomes the primary iterator. This is +// important. Calling Optimize() is the way to change the order based on +// subiterator statistics. Without Optimize(), the order added is the order +// used. +func (and *AndIterator) AddSubIterator(sub Iterator) { + if and.itCount > 0 { + and.internalIterators = append(and.internalIterators, sub) + and.itCount++ + return + } + and.primaryIt = sub + and.itCount++ +} + +// Returns the Next value from the And iterator. Because the And is the +// intersection of its subiterators, it must choose one subiterator to produce a +// candidate, and check this value against the subiterators. A productive choice +// of primary iterator is therefore very important. +func (and *AndIterator) Next() (TSVal, bool) { + NextLogIn(and) + var curr TSVal + var exists bool + for { + + curr, exists = and.primaryIt.Next() + if !exists { + return NextLogOut(and, nil, false) + } + if and.checkSubIts(curr) { + and.Last = curr + return NextLogOut(and, curr, true) + } + } + panic("Somehow broke out of Next() loop in AndIterator") +} + +// Checks a value against the non-primary iterators, in order. +func (and *AndIterator) checkSubIts(val TSVal) bool { + var subIsGood = true + for _, it := range and.internalIterators { + subIsGood = it.Check(val) + if !subIsGood { + break + } + } + return subIsGood +} + +func (and *AndIterator) checkCheckList(val TSVal) bool { + var isGood = true + for e := and.checkList.Front(); e != nil; e = e.Next() { + isGood = e.Value.(Iterator).Check(val) + if !isGood { + break + } + } + return CheckLogOut(and, val, isGood) +} + +// Check a value against the entire iterator, in order. +func (and *AndIterator) Check(val TSVal) bool { + CheckLogIn(and, val) + if and.checkList != nil { + return and.checkCheckList(val) + } + mainGood := and.primaryIt.Check(val) + if !mainGood { + return CheckLogOut(and, val, false) + } + othersGood := and.checkSubIts(val) + if !othersGood { + return CheckLogOut(and, val, false) + } + and.Last = val + return CheckLogOut(and, val, true) +} + +// Returns the approximate size of the And iterator. Because we're dealing +// with an intersection, we know that the largest we can be is the size of the +// smallest iterator. This is the heuristic we shall follow. Better heuristics +// welcome. +func (and *AndIterator) Size() (int64, bool) { + val, b := and.primaryIt.Size() + for _, it := range and.internalIterators { + newval, newb := it.Size() + if val > newval { + val = newval + } + b = newb && b + } + return val, b +} + +// An And has no NextResult of its own -- that is, there are no other values +// which satisfy our previous result that are not the result itself. Our +// subiterators might, however, so just pass the call recursively. +func (and *AndIterator) NextResult() bool { + if and.primaryIt.NextResult() { + return true + } + for _, it := range and.internalIterators { + if it.NextResult() { + return true + } + } + return false +} + +// Perform and-specific cleanup, of which there currently is none. +func (and *AndIterator) cleanUp() { +} + +// Close this iterator, and, by extension, close the subiterators. +// Close should be idempotent, and it follows that if it's subiterators +// follow this contract, the And follows the contract. +func (and *AndIterator) Close() { + and.cleanUp() + and.primaryIt.Close() + for _, it := range and.internalIterators { + it.Close() + } +} + +// Register this as an "and" iterator. +func (and *AndIterator) Type() string { return "and" } diff --git a/graph/and-iterator_test.go b/graph/and-iterator_test.go new file mode 100644 index 0000000..d0fbf2e --- /dev/null +++ b/graph/and-iterator_test.go @@ -0,0 +1,147 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "testing" +) + +// Make sure that tags work on the And. +func TestTag(t *testing.T) { + fix1 := newFixedIterator() + fix1.AddValue(234) + fix1.AddTag("foo") + and := NewAndIterator() + and.AddSubIterator(fix1) + and.AddTag("bar") + out := fix1.Tags() + if len(out) != 1 { + t.Errorf("Expected length 1, got %d", len(out)) + } + if out[0] != "foo" { + t.Errorf("Cannot get tag back, got %s", out[0]) + } + + val, ok := and.Next() + if !ok { + t.Errorf("And did not next") + } + if val != 234 { + t.Errorf("Unexpected value") + } + tags := make(map[string]TSVal) + and.TagResults(&tags) + if tags["bar"] != 234 { + t.Errorf("no bar tag") + } + if tags["foo"] != 234 { + t.Errorf("no foo tag") + } +} + +// Do a simple itersection of fixed values. +func TestAndAndFixedIterators(t *testing.T) { + fix1 := newFixedIterator() + fix1.AddValue(1) + fix1.AddValue(2) + fix1.AddValue(3) + fix1.AddValue(4) + fix2 := newFixedIterator() + fix2.AddValue(3) + fix2.AddValue(4) + fix2.AddValue(5) + and := NewAndIterator() + and.AddSubIterator(fix1) + and.AddSubIterator(fix2) + // Should be as big as smallest subiterator + size, accurate := and.Size() + if size != 3 { + t.Error("Incorrect size") + } + if !accurate { + t.Error("not accurate") + } + + val, ok := and.Next() + if val != 3 || ok == false { + t.Error("Incorrect first value") + } + + val, ok = and.Next() + if val != 4 || ok == false { + t.Error("Incorrect second value") + } + + val, ok = and.Next() + if ok { + t.Error("Too many values") + } + +} + +// If there's no intersection, the size should still report the same, +// but there should be nothing to Next() +func TestNonOverlappingFixedIterators(t *testing.T) { + fix1 := newFixedIterator() + fix1.AddValue(1) + fix1.AddValue(2) + fix1.AddValue(3) + fix1.AddValue(4) + fix2 := newFixedIterator() + fix2.AddValue(5) + fix2.AddValue(6) + fix2.AddValue(7) + and := NewAndIterator() + and.AddSubIterator(fix1) + and.AddSubIterator(fix2) + // Should be as big as smallest subiterator + size, accurate := and.Size() + if size != 3 { + t.Error("Incorrect size") + } + if !accurate { + t.Error("not accurate") + } + + _, ok := and.Next() + if ok { + t.Error("Too many values") + } + +} + +func TestAllIterators(t *testing.T) { + all1 := NewInt64AllIterator(1, 5) + all2 := NewInt64AllIterator(4, 10) + and := NewAndIterator() + and.AddSubIterator(all2) + and.AddSubIterator(all1) + + val, ok := and.Next() + if val.(int64) != 4 || ok == false { + t.Error("Incorrect first value") + } + + val, ok = and.Next() + if val.(int64) != 5 || ok == false { + t.Error("Incorrect second value") + } + + val, ok = and.Next() + if ok { + t.Error("Too many values") + } + +} diff --git a/graph/fixed-iterator.go b/graph/fixed-iterator.go new file mode 100644 index 0000000..7578611 --- /dev/null +++ b/graph/fixed-iterator.go @@ -0,0 +1,157 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines one of the base iterators, the Fixed iterator. A fixed iterator is quite simple; it +// contains an explicit fixed array of values. +// +// A fixed iterator requires an Equality function to be passed to it, by reason that TSVal, the +// opaque Triple store value, may not answer to ==. + +import ( + "fmt" + "strings" +) + +// A Fixed iterator consists of it's values, an index (where it is in the process of Next()ing) and +// an equality function. +type FixedIterator struct { + BaseIterator + values []TSVal + lastIndex int + cmp Equality +} + +// Define the signature of an equality function. +type Equality func(a, b TSVal) bool + +// Define an equality function of purely ==, which works for native types. +func BasicEquality(a, b TSVal) bool { + if a == b { + return true + } + return false +} + +// Creates a new Fixed iterator based around == equality. +func newFixedIterator() *FixedIterator { + return NewFixedIteratorWithCompare(BasicEquality) +} + +// Creates a new Fixed iterator with a custom comparitor. +func NewFixedIteratorWithCompare(compareFn Equality) *FixedIterator { + var it FixedIterator + BaseIteratorInit(&it.BaseIterator) + it.values = make([]TSVal, 0, 20) + it.lastIndex = 0 + it.cmp = compareFn + return &it +} + +func (f *FixedIterator) Reset() { + f.lastIndex = 0 +} + +func (f *FixedIterator) Close() { +} + +func (f *FixedIterator) Clone() Iterator { + out := NewFixedIteratorWithCompare(f.cmp) + for _, val := range f.values { + out.AddValue(val) + } + out.CopyTagsFrom(f) + return out +} + +// Add a value to the iterator. The array now contains this value. +// TODO(barakmich): This ought to be a set someday, disallowing repeated values. +func (f *FixedIterator) AddValue(v TSVal) { + f.values = append(f.values, v) +} + +// Print some information about the iterator. +func (f *FixedIterator) DebugString(indent int) string { + value := "" + if len(f.values) > 0 { + value = fmt.Sprint(f.values[0]) + } + return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)", + strings.Repeat(" ", indent), + f.Type(), + f.FixedTags(), + len(f.values), + value, + ) +} + +// Register this iterator as a Fixed iterator. +func (f *FixedIterator) Type() string { + return "fixed" +} + +// Check if the passed value is equal to one of the values stored in the iterator. +func (f *FixedIterator) Check(v TSVal) bool { + // Could be optimized by keeping it sorted or using a better datastructure. + // However, for fixed iterators, which are by definition kind of tiny, this + // isn't a big issue. + CheckLogIn(f, v) + for _, x := range f.values { + if f.cmp(x, v) { + f.Last = x + return CheckLogOut(f, v, true) + } + } + return CheckLogOut(f, v, false) +} + +// Return the next stored value from the iterator. +func (f *FixedIterator) Next() (TSVal, bool) { + NextLogIn(f) + if f.lastIndex == len(f.values) { + return NextLogOut(f, nil, false) + } + out := f.values[f.lastIndex] + f.Last = out + f.lastIndex++ + return NextLogOut(f, out, true) +} + +// Optimize() for a Fixed iterator is simple. Returns a Null iterator if it's empty +// (so that other iterators upstream can treat this as null) or there is no +// optimization. +func (f *FixedIterator) Optimize() (Iterator, bool) { + + if len(f.values) == 1 && f.values[0] == nil { + return &NullIterator{}, true + } + + return f, false +} + +// Size is the number of values stored. +func (f *FixedIterator) Size() (int64, bool) { + return int64(len(f.values)), true +} + +// As we right now have to scan the entire list, Next and Check are linear with the +// size. However, a better data structure could remove these limits. +func (a *FixedIterator) GetStats() *IteratorStats { + return &IteratorStats{ + CheckCost: int64(len(a.values)), + NextCost: int64(len(a.values)), + Size: int64(len(a.values)), + } +} diff --git a/graph/hasa-iterator.go b/graph/hasa-iterator.go new file mode 100644 index 0000000..362b96d --- /dev/null +++ b/graph/hasa-iterator.go @@ -0,0 +1,224 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines one of the base iterators, the HasA iterator. The HasA takes a +// subiterator of links, and acts as an iterator of nodes in the given +// direction. The name comes from the idea that a "link HasA subject" or a "link +// HasA predicate". +// +// HasA is weird in that it may return the same value twice if on the Next() +// path. That's okay -- in reality, it can be viewed as returning the value for +// a new triple, but to make logic much simpler, here we have the HasA. +// +// Likewise, it's important to think about Check()ing a HasA. When given a +// value to check, it means "Check all predicates that have this value for your +// direction against the subiterator." This would imply that there's more than +// one possibility for the same Check()ed value. While we could return the +// number of options, it's simpler to return one, and then call NextResult() +// enough times to enumerate the options. (In fact, one could argue that the +// raison d'etre for NextResult() is this iterator). +// +// Alternatively, can be seen as the dual of the LinksTo iterator. + +import ( + "container/list" + "fmt" + "github.com/barakmich/glog" + "strings" +) + +// A HasaIterator consists of a reference back to the TripleStore that it references, +// a primary subiterator, a direction in which the triples for that subiterator point, +// and a temporary holder for the iterator generated on Check(). +type HasaIterator struct { + BaseIterator + ts TripleStore + primaryIt Iterator + direction string + resultIt Iterator +} + +// Construct a new HasA iterator, given the triple subiterator, and the triple +// direction for which it stands. +func NewHasaIterator(ts TripleStore, subIt Iterator, dir string) *HasaIterator { + var hasa HasaIterator + BaseIteratorInit(&hasa.BaseIterator) + hasa.ts = ts + hasa.primaryIt = subIt + hasa.direction = dir + return &hasa +} + +// Return our sole subiterator, in a list.List. +func (h *HasaIterator) GetSubIterators() *list.List { + l := list.New() + l.PushBack(h.primaryIt) + return l +} + +func (h *HasaIterator) Reset() { + h.primaryIt.Reset() + if h.resultIt != nil { + h.resultIt.Close() + } +} + +func (h *HasaIterator) Clone() Iterator { + out := NewHasaIterator(h.ts, h.primaryIt.Clone(), h.direction) + out.CopyTagsFrom(h) + return out +} + +// Direction accessor. +func (h *HasaIterator) Direction() string { return h.direction } + +// Pass the Optimize() call along to the subiterator. If it becomes Null, +// then the HasA becomes Null (there are no triples that have any directions). +func (h *HasaIterator) Optimize() (Iterator, bool) { + + newPrimary, changed := h.primaryIt.Optimize() + if changed { + h.primaryIt = newPrimary + if h.primaryIt.Type() == "null" { + return h.primaryIt, true + } + } + return h, false +} + +// Pass the TagResults down the chain. +func (h *HasaIterator) TagResults(out *map[string]TSVal) { + h.BaseIterator.TagResults(out) + h.primaryIt.TagResults(out) +} + +// DEPRECATED Return results in a ResultTree. +func (h *HasaIterator) GetResultTree() *ResultTree { + tree := NewResultTree(h.LastResult()) + tree.AddSubtree(h.primaryIt.GetResultTree()) + return tree +} + +// Print some information about this iterator. +func (h *HasaIterator) DebugString(indent int) string { + var tags string + for _, k := range h.Tags() { + tags += fmt.Sprintf("%s;", k) + } + return fmt.Sprintf("%s(%s %d tags:%s direction:%s\n%s)", strings.Repeat(" ", indent), h.Type(), h.GetUid(), tags, h.direction, h.primaryIt.DebugString(indent+4)) +} + +// Check a value against our internal iterator. In order to do this, we must first open a new +// iterator of "triples that have `val` in our direction", given to us by the triple store, +// and then Next() values out of that iterator and Check() them against our subiterator. +func (h *HasaIterator) Check(val TSVal) bool { + CheckLogIn(h, val) + if glog.V(4) { + glog.V(4).Infoln("Id is", h.ts.GetNameFor(val)) + } + // TODO(barakmich): Optimize this + if h.resultIt != nil { + h.resultIt.Close() + } + h.resultIt = h.ts.GetTripleIterator(h.direction, val) + return CheckLogOut(h, val, h.GetCheckResult()) +} + +// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the +// result iterator (a triple iterator based on the last checked value) and returns true if +// another match is made. +func (h *HasaIterator) GetCheckResult() bool { + for { + linkVal, ok := h.resultIt.Next() + if !ok { + break + } + if glog.V(4) { + glog.V(4).Infoln("Triple is", h.ts.GetTriple(linkVal).ToString()) + } + if h.primaryIt.Check(linkVal) { + h.Last = h.ts.GetTripleDirection(linkVal, h.direction) + return true + } + } + return false +} + +// Get the next result that matches this branch. +func (h *HasaIterator) NextResult() bool { + // Order here is important. If the subiterator has a NextResult, then we + // need do nothing -- there is a next result, and we shouldn't move forward. + // However, we then need to get the next result from our last Check(). + // + // The upshot is, the end of NextResult() bubbles up from the bottom of the + // iterator tree up, and we need to respect that. + if h.primaryIt.NextResult() { + return true + } + return h.GetCheckResult() +} + +// Get the next result from this iterator. This is simpler than Check. We have a +// subiterator we can get a value from, and we can take that resultant triple, +// pull our direction out of it, and return that. +func (h *HasaIterator) Next() (TSVal, bool) { + NextLogIn(h) + if h.resultIt != nil { + h.resultIt.Close() + } + h.resultIt = &NullIterator{} + + tID, ok := h.primaryIt.Next() + if !ok { + return NextLogOut(h, 0, false) + } + name := h.ts.GetTriple(tID).Get(h.direction) + val := h.ts.GetIdFor(name) + h.Last = val + return NextLogOut(h, val, true) +} + +// GetStats() returns the statistics on the HasA iterator. This is curious. Next +// cost is easy, it's an extra call or so on top of the subiterator Next cost. +// CheckCost involves going to the TripleStore, iterating out values, and hoping +// one sticks -- potentially expensive, depending on fanout. Size, however, is +// potentially smaller. we know at worst it's the size of the subiterator, but +// if there are many repeated values, it could be much smaller in totality. +func (h *HasaIterator) GetStats() *IteratorStats { + subitStats := h.primaryIt.GetStats() + // TODO(barakmich): These should really come from the triplestore itself + // and be optimized. + faninFactor := int64(1) + fanoutFactor := int64(30) + nextConstant := int64(2) + tripleConstant := int64(1) + return &IteratorStats{ + NextCost: tripleConstant + subitStats.NextCost, + CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost, + Size: faninFactor * subitStats.Size, + } +} + +// Close the subiterator, the result iterator (if any) and the HasA. +func (h *HasaIterator) Close() { + if h.resultIt != nil { + h.resultIt.Close() + } + h.primaryIt.Close() +} + +// Register this iterator as a HasA. +func (h *HasaIterator) Type() string { return "hasa" } diff --git a/graph/iterator.go b/graph/iterator.go new file mode 100644 index 0000000..7aa25bc --- /dev/null +++ b/graph/iterator.go @@ -0,0 +1,304 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Define the general iterator interface, as well as the BaseIterator which all +// iterators can "inherit" from to get default iterator functionality. + +import ( + "container/list" + "fmt" + "github.com/barakmich/glog" + "strings" +) + +var iterator_n int = 0 + +type Iterator interface { + // Tags are the way we handle results. By adding a tag to an iterator, we can + // "name" it, in a sense, and at each step of iteration, get a named result. + // TagResults() is therefore the handy way of walking an iterator tree and + // getting the named results. + // + // Tag Accessors. + AddTag(string) + Tags() []string + AddFixedTag(string, TSVal) + FixedTags() map[string]TSVal + CopyTagsFrom(Iterator) + // Fills a tag-to-result-value map. + TagResults(*map[string]TSVal) + // Returns the current result. + LastResult() TSVal + // DEPRECATED -- Fills a ResultTree struct with Result(). + GetResultTree() *ResultTree + + // These methods are the heart and soul of the iterator, as they constitute + // the iteration interface. + // + // To get the full results of iteraton, do the following: + // while (!Next()): + // emit result + // while (!NextResult()): + // emit result + // + // All of them should set iterator.Last to be the last returned value, to + // make results work. + // + // Next() advances the iterator and returns the next valid result. Returns + // (, true) or (nil, false) + Next() (TSVal, bool) + // NextResult() advances iterators that may have more than one valid result, + // from the bottom up. + NextResult() bool + // Check(), given a value, returns whether or not that value is within the set + // held by this iterator. + Check(TSVal) bool + // Start iteration from the beginning + Reset() + // Create a new iterator just like this one + Clone() Iterator + // These methods relate to choosing the right iterator, or optimizing an + // iterator tree + // + // GetStats() returns the relative costs of calling the iteration methods for + // this iterator, as well as the size. Roughly, it will take NextCost * Size + // "cost units" to get everything out of the iterator. This is a wibbly-wobbly + // thing, and not exact, but a useful heuristic. + GetStats() *IteratorStats + // Helpful accessor for the number of things in the iterator. The first return + // value is the size, and the second return value is whether that number is exact, + // or a conservative estimate. + Size() (int64, bool) + // Returns a string relating to what the function of the iterator is. By + // knowing the names of the iterators, we can devise optimization strategies. + Type() string + // Optimizes an iterator. Can replace the iterator, or merely move things + // around internally. if it chooses to replace it with a better iterator, + // returns (the new iterator, true), if not, it returns (self, false). + Optimize() (Iterator, bool) + // Return a list of the subiterators for this iterator. + GetSubIterators() *list.List + + // Return a string representation of the iterator, indented by the given amount. + DebugString(int) string + // Return whether this iterator is relaiably nextable. Most iterators are. + // However, some iterators, like "not" are, by definition, the whole database + // except themselves. Next() on these is unproductive, if impossible. + Nextable() bool + // Close the iterator and do internal cleanup. + Close() + GetUid() int +} + +type IteratorStats struct { + CheckCost int64 + NextCost int64 + Size int64 +} + +// The Base iterator is the iterator other iterators inherit from to get some +// default functionality. +type BaseIterator struct { + Last TSVal + tags []string + fixedTags map[string]TSVal + nextable bool + uid int +} + +// Called by subclases. +func BaseIteratorInit(b *BaseIterator) { + // Your basic iterator is nextable + b.nextable = true + b.uid = iterator_n + if glog.V(2) { + iterator_n++ + } +} + +func (b *BaseIterator) GetUid() int { + return b.uid +} + +// Adds a tag to the iterator. Most iterators don't need to override. +func (b *BaseIterator) AddTag(tag string) { + if b.tags == nil { + b.tags = make([]string, 0) + } + b.tags = append(b.tags, tag) +} + +func (b *BaseIterator) AddFixedTag(tag string, value TSVal) { + if b.fixedTags == nil { + b.fixedTags = make(map[string]TSVal) + } + b.fixedTags[tag] = value +} + +// Returns the tags. +func (b *BaseIterator) Tags() []string { + return b.tags +} + +func (b *BaseIterator) FixedTags() map[string]TSVal { + return b.fixedTags +} + +func (b *BaseIterator) CopyTagsFrom(other_it Iterator) { + for _, tag := range other_it.Tags() { + b.AddTag(tag) + } + + for k, v := range other_it.FixedTags() { + b.AddFixedTag(k, v) + } + +} + +// Prints a silly debug string. Most classes override. +func (n *BaseIterator) DebugString(indent int) string { + return fmt.Sprintf("%s(base)", strings.Repeat(" ", indent)) +} + +// Nothing in a base iterator. +func (n *BaseIterator) Check(v TSVal) bool { + return false +} + +// Base iterators should never appear in a tree if they are, select against +// them. +func (n *BaseIterator) GetStats() *IteratorStats { + return &IteratorStats{100000, 100000, 100000} +} + +// DEPRECATED +func (b *BaseIterator) GetResultTree() *ResultTree { + tree := NewResultTree(b.LastResult()) + return tree +} + +// Nothing in a base iterator. +func (n *BaseIterator) Next() (TSVal, bool) { + return nil, false +} + +func (n *BaseIterator) NextResult() bool { + return false +} + +// Returns the last result of an iterator. +func (n *BaseIterator) LastResult() TSVal { + return n.Last +} + +// If you're empty and you know it, clap your hands. +func (n *BaseIterator) Size() (int64, bool) { + return 0, true +} + +// No subiterators. Only those with subiterators need to do anything here. +func (n *BaseIterator) GetSubIterators() *list.List { + return nil +} + +// Accessor +func (b *BaseIterator) Nextable() bool { return b.nextable } + +// Fill the map based on the tags assigned to this iterator. Default +// functionality works well for most iterators. +func (a *BaseIterator) TagResults(out_map *map[string]TSVal) { + for _, tag := range a.Tags() { + (*out_map)[tag] = a.LastResult() + } + + for tag, value := range a.FixedTags() { + (*out_map)[tag] = value + } +} + +// Nothing to clean up. +//func (a *BaseIterator) Close() {} +func (a *NullIterator) Close() {} + +func (a *BaseIterator) Reset() {} + +// Here we define the simplest base iterator -- the Null iterator. It contains nothing. +// It is the empty set. Often times, queries that contain one of these match nothing, +// so it's important to give it a special iterator. +type NullIterator struct { + BaseIterator +} + +// Fairly useless New function. +func NewNullIterator() *NullIterator { + var n NullIterator + return &n +} + +func (n *NullIterator) Clone() Iterator { return NewNullIterator() } + +// Name the null iterator. +func (n *NullIterator) Type() string { return "null" } + +// A good iterator will close itself when it returns true. +// Null has nothing it needs to do. +func (n *NullIterator) Optimize() (Iterator, bool) { return n, false } + +// Print the null iterator. +func (n *NullIterator) DebugString(indent int) string { + return strings.Repeat(" ", indent) + "(null)" +} + +// A null iterator costs nothing. Use it! +func (n *NullIterator) GetStats() *IteratorStats { + return &IteratorStats{0, 0, 0} +} + +// Utility logging functions for when an iterator gets called Next upon, or Check upon, as +// well as what they return. Highly useful for tracing the execution path of a query. +func CheckLogIn(it Iterator, val TSVal) { + if glog.V(4) { + glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type()), it.GetUid(), val) + } +} + +func CheckLogOut(it Iterator, val TSVal, good bool) bool { + if glog.V(4) { + if good { + glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type()), it.GetUid(), val) + } else { + glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type()), it.GetUid(), val) + } + } + return good +} + +func NextLogIn(it Iterator) { + if glog.V(4) { + glog.V(4).Infof("%s %d NEXT", strings.ToUpper(it.Type()), it.GetUid()) + } +} + +func NextLogOut(it Iterator, val TSVal, ok bool) (TSVal, bool) { + if glog.V(4) { + if ok { + glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type()), it.GetUid(), val) + } else { + glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type()), it.GetUid()) + } + } + return val, ok +} diff --git a/graph/linksto-iterator.go b/graph/linksto-iterator.go new file mode 100644 index 0000000..45c5e2f --- /dev/null +++ b/graph/linksto-iterator.go @@ -0,0 +1,184 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines one of the base iterators, the LinksTo iterator. A LinksTo takes a +// subiterator of nodes, and contains an iteration of links which "link to" +// those nodes in a given direction. +// +// Next()ing a LinksTo is straightforward -- iterate through all links to // +// things in the subiterator, and then advance the subiterator, and do it again. +// LinksTo is therefore sensitive to growing with a fanout. (A small-sized +// subiterator could cause LinksTo to be large). +// +// Check()ing a LinksTo means, given a link, take the direction we care about +// and check if it's in our subiterator. Checking is therefore fairly cheap, and +// similar to checking the subiterator alone. +// +// Can be seen as the dual of the HasA iterator. + +import ( + "container/list" + "fmt" + "strings" +) + +// A LinksTo has a reference back to the TripleStore (to create the iterators +// for each node) the subiterator, and the direction the iterator comes from. +// `next_it` is the tempoarary iterator held per result in `primary_it`. +type LinksToIterator struct { + BaseIterator + ts TripleStore + primaryIt Iterator + direction string + nextIt Iterator +} + +// Construct a new LinksTo iterator around a direction and a subiterator of +// nodes. +func NewLinksToIterator(ts TripleStore, it Iterator, dir string) *LinksToIterator { + var lto LinksToIterator + BaseIteratorInit(<o.BaseIterator) + lto.ts = ts + lto.primaryIt = it + lto.direction = dir + lto.nextIt = &NullIterator{} + return <o +} + +func (l *LinksToIterator) Reset() { + l.primaryIt.Reset() + if l.nextIt != nil { + l.nextIt.Close() + } + l.nextIt = &NullIterator{} +} + +func (l *LinksToIterator) Clone() Iterator { + out := NewLinksToIterator(l.ts, l.primaryIt.Clone(), l.direction) + out.CopyTagsFrom(l) + return out +} + +// Return the direction under consideration. +func (l *LinksToIterator) Direction() string { return l.direction } + +// Tag these results, and our subiterator's results. +func (l *LinksToIterator) TagResults(out *map[string]TSVal) { + l.BaseIterator.TagResults(out) + l.primaryIt.TagResults(out) +} + +// DEPRECATED +func (l *LinksToIterator) GetResultTree() *ResultTree { + tree := NewResultTree(l.LastResult()) + tree.AddSubtree(l.primaryIt.GetResultTree()) + return tree +} + +// Print the iterator. +func (l *LinksToIterator) DebugString(indent int) string { + return fmt.Sprintf("%s(%s %d direction:%s\n%s)", + strings.Repeat(" ", indent), + l.Type(), l.GetUid(), l.direction, l.primaryIt.DebugString(indent+4)) +} + +// If it checks in the right direction for the subiterator, it is a valid link +// for the LinksTo. +func (l *LinksToIterator) Check(val TSVal) bool { + CheckLogIn(l, val) + node := l.ts.GetTripleDirection(val, l.direction) + if l.primaryIt.Check(node) { + l.Last = val + return CheckLogOut(l, val, true) + } + return CheckLogOut(l, val, false) +} + +// Return a list containing only our subiterator. +func (lto *LinksToIterator) GetSubIterators() *list.List { + l := list.New() + l.PushBack(lto.primaryIt) + return l +} + +// Optimize the LinksTo, by replacing it if it can be. +func (lto *LinksToIterator) Optimize() (Iterator, bool) { + newPrimary, changed := lto.primaryIt.Optimize() + if changed { + lto.primaryIt = newPrimary + if lto.primaryIt.Type() == "null" { + lto.nextIt.Close() + return lto.primaryIt, true + } + } + // Ask the TripleStore if we can be replaced. Often times, this is a great + // optimization opportunity (there's a fixed iterator underneath us, for + // example). + newReplacement, hasOne := lto.ts.OptimizeIterator(lto) + if hasOne { + lto.Close() + return newReplacement, true + } + return lto, false +} + +// Next()ing a LinksTo operates as described above. +func (l *LinksToIterator) Next() (TSVal, bool) { + NextLogIn(l) + val, ok := l.nextIt.Next() + if !ok { + // Subiterator is empty, get another one + candidate, ok := l.primaryIt.Next() + if !ok { + // We're out of nodes in our subiterator, so we're done as well. + return NextLogOut(l, 0, false) + } + l.nextIt.Close() + l.nextIt = l.ts.GetTripleIterator(l.direction, candidate) + // Recurse -- return the first in the next set. + return l.Next() + } + l.Last = val + return NextLogOut(l, val, ok) +} + +// Close our subiterators. +func (l *LinksToIterator) Close() { + l.nextIt.Close() + l.primaryIt.Close() +} + +// We won't ever have a new result, but our subiterators might. +func (l *LinksToIterator) NextResult() bool { + return l.primaryIt.NextResult() +} + +// Register the LinksTo. +func (l *LinksToIterator) Type() string { return "linksto" } + +// Return a guess as to how big or costly it is to next the iterator. +func (l *LinksToIterator) GetStats() *IteratorStats { + subitStats := l.primaryIt.GetStats() + // TODO(barakmich): These should really come from the triplestore itself + fanoutFactor := int64(20) + checkConstant := int64(1) + nextConstant := int64(2) + return &IteratorStats{ + NextCost: nextConstant + subitStats.NextCost, + CheckCost: checkConstant + subitStats.CheckCost, + Size: fanoutFactor * subitStats.Size, + } +} diff --git a/graph/linksto-iterator_test.go b/graph/linksto-iterator_test.go new file mode 100644 index 0000000..06cdd2a --- /dev/null +++ b/graph/linksto-iterator_test.go @@ -0,0 +1,37 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "testing" +) + +func TestLinksTo(t *testing.T) { + ts := new(TestTripleStore) + tsFixed := newFixedIterator() + tsFixed.AddValue(2) + ts.On("GetIdFor", "cool").Return(1) + ts.On("GetTripleIterator", "o", 1).Return(tsFixed) + fixed := newFixedIterator() + fixed.AddValue(ts.GetIdFor("cool")) + lto := NewLinksToIterator(ts, fixed, "o") + val, ok := lto.Next() + if !ok { + t.Error("At least one triple matches the fixed object") + } + if val != 2 { + t.Errorf("Triple index 2, such as %s, should match %s", ts.GetTriple(2), ts.GetTriple(val)) + } +} diff --git a/graph/memstore/llrb-iterator.go b/graph/memstore/llrb-iterator.go new file mode 100644 index 0000000..692a3c6 --- /dev/null +++ b/graph/memstore/llrb-iterator.go @@ -0,0 +1,119 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package memstore + +import ( + "fmt" + "math" + "strings" + + "github.com/petar/GoLLRB/llrb" + + "github.com/google/cayley/graph" +) + +type LlrbIterator struct { + graph.BaseIterator + tree *llrb.LLRB + data string + isRunning bool + iterLast Int64 +} + +type Int64 int64 + +func (i Int64) Less(than llrb.Item) bool { + return i < than.(Int64) +} + +func IterateOne(tree *llrb.LLRB, last Int64) Int64 { + var next Int64 + tree.AscendGreaterOrEqual(last, func(i llrb.Item) bool { + if i.(Int64) == last { + return true + } else { + next = i.(Int64) + return false + } + }) + return next +} + +func NewLlrbIterator(tree *llrb.LLRB, data string) *LlrbIterator { + var it LlrbIterator + graph.BaseIteratorInit(&it.BaseIterator) + it.tree = tree + it.iterLast = Int64(-1) + it.data = data + return &it +} + +func (it *LlrbIterator) Reset() { + it.iterLast = Int64(-1) +} + +func (it *LlrbIterator) Clone() graph.Iterator { + var new_it = NewLlrbIterator(it.tree, it.data) + new_it.CopyTagsFrom(it) + return new_it +} + +func (it *LlrbIterator) Close() {} + +func (it *LlrbIterator) Next() (graph.TSVal, bool) { + graph.NextLogIn(it) + if it.tree.Max() == nil || it.Last == int64(it.tree.Max().(Int64)) { + return graph.NextLogOut(it, nil, false) + } + it.iterLast = IterateOne(it.tree, it.iterLast) + it.Last = int64(it.iterLast) + return graph.NextLogOut(it, it.Last, true) +} + +func (it *LlrbIterator) Size() (int64, bool) { + return int64(it.tree.Len()), true +} + +func (it *LlrbIterator) Check(v graph.TSVal) bool { + graph.CheckLogIn(it, v) + if it.tree.Has(Int64(v.(int64))) { + it.Last = v + return graph.CheckLogOut(it, v, true) + } + return graph.CheckLogOut(it, v, false) +} + +func (it *LlrbIterator) DebugString(indent int) string { + size, _ := it.Size() + return fmt.Sprintf("%s(%s tags:%s size:%d %s)", strings.Repeat(" ", indent), it.Type(), it.Tags(), size, it.data) +} + +func (it *LlrbIterator) Type() string { + return "llrb" +} +func (it *LlrbIterator) Sorted() bool { + return true +} +func (it *LlrbIterator) Optimize() (graph.Iterator, bool) { + return it, false +} + +func (it *LlrbIterator) GetStats() *graph.IteratorStats { + return &graph.IteratorStats{ + CheckCost: int64(math.Log(float64(it.tree.Len()))) + 1, + NextCost: 1, + Size: int64(it.tree.Len()), + } +} diff --git a/graph/memstore/memstore-all-iterator.go b/graph/memstore/memstore-all-iterator.go new file mode 100644 index 0000000..99cf734 --- /dev/null +++ b/graph/memstore/memstore-all-iterator.go @@ -0,0 +1,45 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package memstore + +import ( + "github.com/google/cayley/graph" +) + +type MemstoreAllIterator struct { + graph.Int64AllIterator + ts *MemTripleStore +} + +func NewMemstoreAllIterator(ts *MemTripleStore) *MemstoreAllIterator { + var out MemstoreAllIterator + out.Int64AllIterator = *graph.NewInt64AllIterator(1, ts.idCounter-1) + out.ts = ts + return &out +} + +func (memall *MemstoreAllIterator) Next() (graph.TSVal, bool) { + next, out := memall.Int64AllIterator.Next() + if !out { + return next, out + } + i64 := next.(int64) + _, ok := memall.ts.revIdMap[i64] + if !ok { + return memall.Next() + } + memall.Last = next + return next, out +} diff --git a/graph/memstore/memtriplestore-iterator-optimize.go b/graph/memstore/memtriplestore-iterator-optimize.go new file mode 100644 index 0000000..7c895fc --- /dev/null +++ b/graph/memstore/memtriplestore-iterator-optimize.go @@ -0,0 +1,53 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package memstore + +import ( + "github.com/google/cayley/graph" +) + +func (ts *MemTripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { + switch it.Type() { + case "linksto": + return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) + + } + return it, false +} + +func (ts *MemTripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { + l := it.GetSubIterators() + if l.Len() != 1 { + return it, false + } + primaryIt := l.Front().Value.(graph.Iterator) + if primaryIt.Type() == "fixed" { + size, _ := primaryIt.Size() + if size == 1 { + val, ok := primaryIt.Next() + if !ok { + panic("Sizes lie") + } + newIt := ts.GetTripleIterator(it.Direction(), val) + newIt.CopyTagsFrom(it) + for _, tag := range primaryIt.Tags() { + newIt.AddFixedTag(tag, val) + } + return newIt, true + } + } + it.Close() + return it, false +} diff --git a/graph/memstore/memtriplestore.go b/graph/memstore/memtriplestore.go new file mode 100644 index 0000000..7c7882e --- /dev/null +++ b/graph/memstore/memtriplestore.go @@ -0,0 +1,268 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package memstore + +import ( + "fmt" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" + + "github.com/petar/GoLLRB/llrb" +) + +type TripleDirectionIndex struct { + subject map[int64]*llrb.LLRB + predicate map[int64]*llrb.LLRB + object map[int64]*llrb.LLRB + provenance map[int64]*llrb.LLRB +} + +func NewTripleDirectionIndex() *TripleDirectionIndex { + var tdi TripleDirectionIndex + tdi.subject = make(map[int64]*llrb.LLRB) + tdi.predicate = make(map[int64]*llrb.LLRB) + tdi.object = make(map[int64]*llrb.LLRB) + tdi.provenance = make(map[int64]*llrb.LLRB) + return &tdi +} + +func (tdi *TripleDirectionIndex) GetForDir(s string) map[int64]*llrb.LLRB { + if s == "s" { + return tdi.subject + } else if s == "o" { + return tdi.object + } else if s == "p" { + return tdi.predicate + } else if s == "c" { + return tdi.provenance + } + panic("Bad direction") +} + +func (tdi *TripleDirectionIndex) GetOrCreate(dir string, id int64) *llrb.LLRB { + directionIndex := tdi.GetForDir(dir) + if _, ok := directionIndex[id]; !ok { + directionIndex[id] = llrb.New() + } + return directionIndex[id] +} + +func (tdi *TripleDirectionIndex) Get(dir string, id int64) (*llrb.LLRB, bool) { + directionIndex := tdi.GetForDir(dir) + tree, exists := directionIndex[id] + return tree, exists +} + +type MemTripleStore struct { + idCounter int64 + tripleIdCounter int64 + idMap map[string]int64 + revIdMap map[int64]string + triples []graph.Triple + size int64 + index TripleDirectionIndex + // vip_index map[string]map[int64]map[string]map[int64]*llrb.Tree +} + +func NewMemTripleStore() *MemTripleStore { + var ts MemTripleStore + ts.idMap = make(map[string]int64) + ts.revIdMap = make(map[int64]string) + ts.triples = make([]graph.Triple, 1, 200) + + // Sentinel null triple so triple indices start at 1 + ts.triples[0] = graph.Triple{} + ts.size = 1 + ts.index = *NewTripleDirectionIndex() + ts.idCounter = 1 + ts.tripleIdCounter = 1 + return &ts +} + +func (ts *MemTripleStore) AddTripleSet(triples []*graph.Triple) { + for _, t := range triples { + ts.AddTriple(t) + } +} + +func (ts *MemTripleStore) tripleExists(t *graph.Triple) (bool, int64) { + smallest := -1 + var smallest_tree *llrb.LLRB + for _, dir := range graph.TripleDirections { + sid := t.Get(dir) + if dir == "c" && sid == "" { + continue + } + id, ok := ts.idMap[sid] + // If we've never heard about a node, it most not exist + if !ok { + return false, 0 + } + index, exists := ts.index.Get(dir, id) + if !exists { + // If it's never been indexed in this direction, it can't exist. + return false, 0 + } + if smallest == -1 || index.Len() < smallest { + smallest = index.Len() + smallest_tree = index + } + } + it := NewLlrbIterator(smallest_tree, "") + + for { + val, ok := it.Next() + if !ok { + break + } + if t.Equals(&ts.triples[val.(int64)]) { + return true, val.(int64) + } + } + return false, 0 +} + +func (ts *MemTripleStore) AddTriple(t *graph.Triple) { + if exists, _ := ts.tripleExists(t); exists { + return + } + var tripleID int64 + ts.triples = append(ts.triples, *t) + tripleID = ts.tripleIdCounter + ts.size++ + ts.tripleIdCounter++ + + for _, dir := range graph.TripleDirections { + sid := t.Get(dir) + if dir == "c" && sid == "" { + continue + } + if _, ok := ts.idMap[sid]; !ok { + ts.idMap[sid] = ts.idCounter + ts.revIdMap[ts.idCounter] = sid + ts.idCounter++ + } + } + + for _, dir := range graph.TripleDirections { + if dir == "c" && t.Get(dir) == "" { + continue + } + id := ts.idMap[t.Get(dir)] + tree := ts.index.GetOrCreate(dir, id) + tree.ReplaceOrInsert(Int64(tripleID)) + } + + // TODO(barakmich): Add VIP indexing +} + +func (ts *MemTripleStore) RemoveTriple(t *graph.Triple) { + var tripleID int64 + var exists bool + tripleID = 0 + if exists, tripleID = ts.tripleExists(t); !exists { + return + } + + ts.triples[tripleID] = graph.Triple{} + ts.size-- + + for _, dir := range graph.TripleDirections { + if dir == "c" && t.Get(dir) == "" { + continue + } + id := ts.idMap[t.Get(dir)] + tree := ts.index.GetOrCreate(dir, id) + tree.Delete(Int64(tripleID)) + } + + for _, dir := range graph.TripleDirections { + if dir == "c" && t.Get(dir) == "" { + continue + } + id, ok := ts.idMap[t.Get(dir)] + if !ok { + continue + } + stillExists := false + for _, dir := range graph.TripleDirections { + if dir == "c" && t.Get(dir) == "" { + continue + } + nodeTree := ts.index.GetOrCreate(dir, id) + if nodeTree.Len() != 0 { + stillExists = true + break + } + } + if !stillExists { + delete(ts.idMap, t.Get(dir)) + delete(ts.revIdMap, id) + } + } +} + +func (ts *MemTripleStore) GetTriple(index graph.TSVal) *graph.Triple { + return &ts.triples[index.(int64)] +} + +func (ts *MemTripleStore) GetTripleIterator(direction string, value graph.TSVal) graph.Iterator { + index, ok := ts.index.Get(direction, value.(int64)) + data := fmt.Sprintf("dir:%s val:%d", direction, value.(int64)) + if ok { + return NewLlrbIterator(index, data) + } + return &graph.NullIterator{} +} + +func (ts *MemTripleStore) Size() int64 { + return ts.size - 1 // Don't count the sentinel +} + +func (ts *MemTripleStore) DebugPrint() { + for i, t := range ts.triples { + if i == 0 { + continue + } + glog.V(2).Infoln("%d: %s", i, t.ToString()) + } +} + +func (ts *MemTripleStore) GetIdFor(name string) graph.TSVal { + return ts.idMap[name] +} + +func (ts *MemTripleStore) GetNameFor(id graph.TSVal) string { + return ts.revIdMap[id.(int64)] +} + +func (ts *MemTripleStore) GetTriplesAllIterator() graph.Iterator { + return graph.NewInt64AllIterator(0, ts.Size()) +} + +func (ts *MemTripleStore) MakeFixed() *graph.FixedIterator { + return graph.NewFixedIteratorWithCompare(graph.BasicEquality) +} + +func (ts *MemTripleStore) GetTripleDirection(val graph.TSVal, direction string) graph.TSVal { + name := ts.GetTriple(val).Get(direction) + return ts.GetIdFor(name) +} + +func (ts *MemTripleStore) GetNodesAllIterator() graph.Iterator { + return NewMemstoreAllIterator(ts) +} +func (ts *MemTripleStore) Close() {} diff --git a/graph/memstore/memtriplestore_test.go b/graph/memstore/memtriplestore_test.go new file mode 100644 index 0000000..71d7016 --- /dev/null +++ b/graph/memstore/memtriplestore_test.go @@ -0,0 +1,138 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package memstore + +import ( + "sort" + "testing" + + . "github.com/smartystreets/goconvey/convey" + + "github.com/google/cayley/graph" +) + +func TestMemstore(t *testing.T) { + Convey("With a simple memstore", t, func() { + ts := MakeTestingMemstore() + Convey("It should have a reasonable size", func() { + So(ts.Size(), ShouldEqual, 11) + }) + Convey("It should have an Id Space that makes sense", func() { + v := ts.GetIdFor("C") + So(v.(int64), ShouldEqual, 4) + }) + }) +} + +func TestIteratorsAndNextResultOrderA(t *testing.T) { + ts := MakeTestingMemstore() + fixed := ts.MakeFixed() + fixed.AddValue(ts.GetIdFor("C")) + all := ts.GetNodesAllIterator() + lto := graph.NewLinksToIterator(ts, all, "o") + innerAnd := graph.NewAndIterator() + + fixed2 := ts.MakeFixed() + fixed2.AddValue(ts.GetIdFor("follows")) + lto2 := graph.NewLinksToIterator(ts, fixed2, "p") + innerAnd.AddSubIterator(lto2) + innerAnd.AddSubIterator(lto) + hasa := graph.NewHasaIterator(ts, innerAnd, "s") + outerAnd := graph.NewAndIterator() + outerAnd.AddSubIterator(fixed) + outerAnd.AddSubIterator(hasa) + val, ok := outerAnd.Next() + if !ok { + t.Error("Expected one matching subtree") + } + if ts.GetNameFor(val) != "C" { + t.Errorf("Matching subtree should be %s, got %s", "barak", ts.GetNameFor(val)) + } + expected := make([]string, 2) + expected[0] = "B" + expected[1] = "D" + actualOut := make([]string, 2) + actualOut[0] = ts.GetNameFor(all.LastResult()) + nresultOk := outerAnd.NextResult() + if !nresultOk { + t.Error("Expected two results got one") + } + actualOut[1] = ts.GetNameFor(all.LastResult()) + nresultOk = outerAnd.NextResult() + if nresultOk { + t.Error("Expected two results got three") + } + CompareStringSlices(t, expected, actualOut) + val, ok = outerAnd.Next() + if ok { + t.Error("More than one possible top level output?") + } +} + +func CompareStringSlices(t *testing.T, expected []string, actual []string) { + if len(expected) != len(actual) { + t.Error("String slices are not the same length") + } + sort.Strings(expected) + sort.Strings(actual) + for i := 0; i < len(expected); i++ { + if expected[i] != actual[i] { + t.Errorf("At index %d, expected \"%s\" and got \"%s\"", i, expected[i], actual[i]) + } + } +} + +func TestLinksToOptimization(t *testing.T) { + ts := MakeTestingMemstore() + fixed := ts.MakeFixed() + fixed.AddValue(ts.GetIdFor("cool")) + lto := graph.NewLinksToIterator(ts, fixed, "o") + lto.AddTag("foo") + newIt, changed := lto.Optimize() + if !changed { + t.Error("Iterator didn't change") + } + if newIt.Type() != "llrb" { + t.Fatal("Didn't swap out to LLRB") + } + v := newIt.(*LlrbIterator) + v_clone := v.Clone() + if v_clone.DebugString(0) != v.DebugString(0) { + t.Fatal("Wrong iterator. Got ", v_clone.DebugString(0)) + } + if len(v_clone.Tags()) < 1 || v_clone.Tags()[0] != "foo" { + t.Fatal("Tag on LinksTo did not persist") + } +} + +func TestRemoveTriple(t *testing.T) { + ts := MakeTestingMemstore() + ts.RemoveTriple(graph.MakeTriple("E", "follows", "F", "")) + fixed := ts.MakeFixed() + fixed.AddValue(ts.GetIdFor("E")) + lto := graph.NewLinksToIterator(ts, fixed, "s") + fixed2 := ts.MakeFixed() + fixed2.AddValue(ts.GetIdFor("follows")) + lto2 := graph.NewLinksToIterator(ts, fixed2, "p") + innerAnd := graph.NewAndIterator() + innerAnd.AddSubIterator(lto2) + innerAnd.AddSubIterator(lto) + hasa := graph.NewHasaIterator(ts, innerAnd, "o") + newIt, _ := hasa.Optimize() + _, ok := newIt.Next() + if ok { + t.Error("E should not have any followers.") + } +} diff --git a/graph/memstore/testing_memstore.go b/graph/memstore/testing_memstore.go new file mode 100644 index 0000000..b33c95f --- /dev/null +++ b/graph/memstore/testing_memstore.go @@ -0,0 +1,45 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package memstore + +import "github.com/google/cayley/graph" + +// +---+ +---+ +// | A |------- ->| F |<-- +// +---+ \------>+---+-/ +---+ \--+---+ +// ------>|#B#| | | E | +// +---+-------/ >+---+ | +---+ +// | C | / v +// +---+ -/ +---+ +// ---- +---+/ |#G#| +// \-->|#D#|------------->+---+ +// +---+ +// + +func MakeTestingMemstore() *MemTripleStore { + ts := NewMemTripleStore() + ts.AddTriple(graph.MakeTriple("A", "follows", "B", "")) + ts.AddTriple(graph.MakeTriple("C", "follows", "B", "")) + ts.AddTriple(graph.MakeTriple("C", "follows", "D", "")) + ts.AddTriple(graph.MakeTriple("D", "follows", "B", "")) + ts.AddTriple(graph.MakeTriple("B", "follows", "F", "")) + ts.AddTriple(graph.MakeTriple("F", "follows", "G", "")) + ts.AddTriple(graph.MakeTriple("D", "follows", "G", "")) + ts.AddTriple(graph.MakeTriple("E", "follows", "F", "")) + ts.AddTriple(graph.MakeTriple("B", "status", "cool", "status_graph")) + ts.AddTriple(graph.MakeTriple("D", "status", "cool", "status_graph")) + ts.AddTriple(graph.MakeTriple("G", "status", "cool", "status_graph")) + return ts +} diff --git a/graph/mock_ts.go b/graph/mock_ts.go new file mode 100644 index 0000000..fcd9244 --- /dev/null +++ b/graph/mock_ts.go @@ -0,0 +1,58 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// A quickly mocked version of the TripleStore interface, for use in tests. +// Can better used Mock.Called but will fill in as needed. + +import ( + "github.com/stretchrcom/testify/mock" +) + +type TestTripleStore struct { + mock.Mock +} + +func (ts *TestTripleStore) GetIdFor(s string) TSVal { + args := ts.Mock.Called(s) + return args.Get(0) +} +func (ts *TestTripleStore) AddTriple(*Triple) {} +func (ts *TestTripleStore) AddTripleSet([]*Triple) {} +func (ts *TestTripleStore) GetTriple(TSVal) *Triple { return &Triple{} } +func (ts *TestTripleStore) GetTripleIterator(s string, i TSVal) Iterator { + args := ts.Mock.Called(s, i) + return args.Get(0).(Iterator) +} +func (ts *TestTripleStore) GetNodesAllIterator() Iterator { return &NullIterator{} } +func (ts *TestTripleStore) GetTriplesAllIterator() Iterator { return &NullIterator{} } +func (ts *TestTripleStore) GetIteratorByString(string, string, string) Iterator { + return &NullIterator{} +} +func (ts *TestTripleStore) GetNameFor(v TSVal) string { + args := ts.Mock.Called(v) + return args.Get(0).(string) +} +func (ts *TestTripleStore) Size() int64 { return 0 } +func (ts *TestTripleStore) DebugPrint() {} +func (ts *TestTripleStore) OptimizeIterator(it Iterator) (Iterator, bool) { + return &NullIterator{}, false +} +func (ts *TestTripleStore) MakeFixed() *FixedIterator { + return NewFixedIteratorWithCompare(BasicEquality) +} +func (ts *TestTripleStore) Close() {} +func (ts *TestTripleStore) GetTripleDirection(TSVal, string) TSVal { return 0 } +func (ts *TestTripleStore) RemoveTriple(t *Triple) {} diff --git a/graph/mongo/lru.go b/graph/mongo/lru.go new file mode 100644 index 0000000..90220e4 --- /dev/null +++ b/graph/mongo/lru.go @@ -0,0 +1,62 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongo + +import ( + "container/list" +) + +type IDLru struct { + cache map[string]*list.Element + priority *list.List + maxSize int +} + +type KV struct { + key string + value string +} + +func NewIDLru(size int) *IDLru { + var lru IDLru + lru.maxSize = size + lru.priority = list.New() + lru.cache = make(map[string]*list.Element) + return &lru +} + +func (lru *IDLru) Put(key string, value string) { + if _, ok := lru.Get(key); ok { + return + } + if len(lru.cache) == lru.maxSize { + lru.removeOldest() + } + lru.priority.PushFront(KV{key: key, value: value}) + lru.cache[key] = lru.priority.Front() +} + +func (lru *IDLru) Get(key string) (string, bool) { + if element, ok := lru.cache[key]; ok { + lru.priority.MoveToFront(element) + return element.Value.(KV).value, true + } + return "", false +} + +func (lru *IDLru) removeOldest() { + last := lru.priority.Remove(lru.priority.Back()) + delete(lru.cache, last.(KV).key) +} diff --git a/graph/mongo/mongo-iterator.go b/graph/mongo/mongo-iterator.go new file mode 100644 index 0000000..addcfb0 --- /dev/null +++ b/graph/mongo/mongo-iterator.go @@ -0,0 +1,181 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongo + +import ( + "fmt" + "strings" + + "github.com/barakmich/glog" + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + + "github.com/google/cayley/graph" +) + +type MongoIterator struct { + graph.BaseIterator + ts *MongoTripleStore + dir string + iter *mgo.Iter + hash string + name string + size int64 + isAll bool + constraint bson.M + collection string +} + +func NewMongoIterator(ts *MongoTripleStore, collection string, dir string, val graph.TSVal) *MongoIterator { + var m MongoIterator + graph.BaseIteratorInit(&m.BaseIterator) + + m.name = ts.GetNameFor(val) + m.collection = collection + switch dir { + + case "s": + m.constraint = bson.M{"Sub": m.name} + case "p": + m.constraint = bson.M{"Pred": m.name} + case "o": + m.constraint = bson.M{"Obj": m.name} + case "c": + m.constraint = bson.M{"Provenance": m.name} + } + + m.ts = ts + m.dir = dir + m.iter = ts.db.C(collection).Find(m.constraint).Iter() + size, err := ts.db.C(collection).Find(m.constraint).Count() + if err != nil { + glog.Errorln("Trouble getting size for iterator! ", err) + return nil + } + m.size = int64(size) + m.hash = val.(string) + m.isAll = false + return &m +} + +func NewMongoAllIterator(ts *MongoTripleStore, collection string) *MongoIterator { + var m MongoIterator + m.ts = ts + m.dir = "all" + m.constraint = nil + m.collection = collection + m.iter = ts.db.C(collection).Find(nil).Iter() + size, err := ts.db.C(collection).Count() + if err != nil { + glog.Errorln("Trouble getting size for iterator! ", err) + return nil + } + m.size = int64(size) + m.hash = "" + m.isAll = true + return &m +} + +func (m *MongoIterator) Reset() { + m.iter.Close() + m.iter = m.ts.db.C(m.collection).Find(m.constraint).Iter() + +} + +func (m *MongoIterator) Close() { + m.iter.Close() +} + +func (m *MongoIterator) Clone() graph.Iterator { + var newM graph.Iterator + if m.isAll { + newM = NewMongoAllIterator(m.ts, m.collection) + } else { + newM = NewMongoIterator(m.ts, m.collection, m.dir, m.hash) + } + newM.CopyTagsFrom(m) + return newM +} + +func (m *MongoIterator) Next() (graph.TSVal, bool) { + var result struct { + Id string "_id" + //Sub string "Sub" + //Pred string "Pred" + //Obj string "Obj" + } + found := m.iter.Next(&result) + if !found { + err := m.iter.Err() + if err != nil { + glog.Errorln("Error Nexting MongoIterator: ", err) + } + return nil, false + } + m.Last = result.Id + return result.Id, true +} + +func (m *MongoIterator) Check(v graph.TSVal) bool { + graph.CheckLogIn(m, v) + if m.isAll { + m.Last = v + return graph.CheckLogOut(m, v, true) + } + var offset int + switch m.dir { + case "s": + offset = 0 + case "p": + offset = (m.ts.hasher.Size() * 2) + case "o": + offset = (m.ts.hasher.Size() * 2) * 2 + case "c": + offset = (m.ts.hasher.Size() * 2) * 3 + } + val := v.(string)[offset : m.ts.hasher.Size()*2+offset] + if val == m.hash { + m.Last = v + return graph.CheckLogOut(m, v, true) + } + return graph.CheckLogOut(m, v, false) +} + +func (m *MongoIterator) Size() (int64, bool) { + return m.size, true +} + +func (m *MongoIterator) Type() string { + if m.isAll { + return "all" + } + return "mongo" +} +func (m *MongoIterator) Sorted() bool { return true } +func (m *MongoIterator) Optimize() (graph.Iterator, bool) { return m, false } + +func (m *MongoIterator) DebugString(indent int) string { + size, _ := m.Size() + return fmt.Sprintf("%s(%s size:%d %s %s)", strings.Repeat(" ", indent), m.Type(), size, m.hash, m.name) +} + +func (m *MongoIterator) GetStats() *graph.IteratorStats { + size, _ := m.Size() + return &graph.IteratorStats{ + CheckCost: 1, + NextCost: 5, + Size: size, + } +} diff --git a/graph/mongo/mongo-triplestore-iterator-optimize.go b/graph/mongo/mongo-triplestore-iterator-optimize.go new file mode 100644 index 0000000..d10bc22 --- /dev/null +++ b/graph/mongo/mongo-triplestore-iterator-optimize.go @@ -0,0 +1,53 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongo + +import ( + "github.com/google/cayley/graph" +) + +func (ts *MongoTripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { + switch it.Type() { + case "linksto": + return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) + + } + return it, false +} + +func (ts *MongoTripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { + l := it.GetSubIterators() + if l.Len() != 1 { + return it, false + } + primaryIt := l.Front().Value.(graph.Iterator) + if primaryIt.Type() == "fixed" { + size, _ := primaryIt.Size() + if size == 1 { + val, ok := primaryIt.Next() + if !ok { + panic("Sizes lie") + } + newIt := ts.GetTripleIterator(it.Direction(), val) + newIt.CopyTagsFrom(it) + for _, tag := range primaryIt.Tags() { + newIt.AddFixedTag(tag, val) + } + it.Close() + return newIt, true + } + } + return it, false +} diff --git a/graph/mongo/mongo-triplestore.go b/graph/mongo/mongo-triplestore.go new file mode 100644 index 0000000..917ab4d --- /dev/null +++ b/graph/mongo/mongo-triplestore.go @@ -0,0 +1,329 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongo + +import ( + "crypto/sha1" + "encoding/hex" + "hash" + "log" + + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + + "github.com/barakmich/glog" + "github.com/google/cayley/graph" +) + +const DefaultDBName = "cayley" + +type MongoTripleStore struct { + session *mgo.Session + db *mgo.Database + hasher hash.Hash + idCache *IDLru +} + +func CreateNewMongoGraph(addr string, options graph.OptionsDict) bool { + conn, err := mgo.Dial(addr) + if err != nil { + glog.Fatal("Error connecting: ", err) + return false + } + conn.SetSafe(&mgo.Safe{}) + dbName := DefaultDBName + if val, ok := options.GetStringKey("database_name"); ok { + dbName = val + } + db := conn.DB(dbName) + indexOpts := mgo.Index{ + Key: []string{"Sub"}, + Unique: false, + DropDups: false, + Background: true, + Sparse: true, + } + db.C("triples").EnsureIndex(indexOpts) + indexOpts.Key = []string{"Pred"} + db.C("triples").EnsureIndex(indexOpts) + indexOpts.Key = []string{"Obj"} + db.C("triples").EnsureIndex(indexOpts) + indexOpts.Key = []string{"Provenance"} + db.C("triples").EnsureIndex(indexOpts) + return true +} + +func NewMongoTripleStore(addr string, options graph.OptionsDict) *MongoTripleStore { + var ts MongoTripleStore + conn, err := mgo.Dial(addr) + if err != nil { + glog.Fatal("Error connecting: ", err) + } + conn.SetSafe(&mgo.Safe{}) + dbName := DefaultDBName + if val, ok := options.GetStringKey("database_name"); ok { + dbName = val + } + ts.db = conn.DB(dbName) + ts.session = conn + ts.hasher = sha1.New() + ts.idCache = NewIDLru(1 << 16) + return &ts +} + +func (ts *MongoTripleStore) getIdForTriple(t *graph.Triple) string { + id := ts.ConvertStringToByteHash(t.Sub) + id += ts.ConvertStringToByteHash(t.Pred) + id += ts.ConvertStringToByteHash(t.Obj) + id += ts.ConvertStringToByteHash(t.Provenance) + return id +} + +func (ts *MongoTripleStore) ConvertStringToByteHash(s string) string { + ts.hasher.Reset() + key := make([]byte, 0, ts.hasher.Size()) + ts.hasher.Write([]byte(s)) + key = ts.hasher.Sum(key) + return hex.EncodeToString(key) +} + +type MongoNode struct { + Id string "_id" + Name string "Name" + Size int "Size" +} + +func (ts *MongoTripleStore) updateNodeBy(node_name string, inc int) { + var size MongoNode + node := ts.GetIdFor(node_name) + err := ts.db.C("nodes").FindId(node).One(&size) + if err != nil { + if err.Error() == "not found" { + // Not found. Okay. + size.Id = node.(string) + size.Name = node_name + size.Size = inc + } else { + glog.Error("Error:", err) + return + } + } else { + size.Id = node.(string) + size.Name = node_name + size.Size += inc + } + + // Removing something... + if inc < 0 { + if size.Size <= 0 { + err := ts.db.C("nodes").RemoveId(node) + if err != nil { + glog.Error("Error: ", err, " while removing node ", node_name) + return + } + } + } + + _, err2 := ts.db.C("nodes").UpsertId(node, size) + if err2 != nil { + glog.Error("Error: ", err) + } +} + +func (ts *MongoTripleStore) writeTriple(t *graph.Triple) bool { + tripledoc := bson.M{"_id": ts.getIdForTriple(t), "Sub": t.Sub, "Pred": t.Pred, "Obj": t.Obj, "Provenance": t.Provenance} + err := ts.db.C("triples").Insert(tripledoc) + if err != nil { + // Among the reasons I hate MongoDB. "Errors don't happen! Right guys?" + if err.(*mgo.LastError).Code == 11000 { + return false + } + glog.Error("Error: ", err) + return false + } + return true +} + +func (ts *MongoTripleStore) AddTriple(t *graph.Triple) { + _ = ts.writeTriple(t) + ts.updateNodeBy(t.Sub, 1) + ts.updateNodeBy(t.Pred, 1) + ts.updateNodeBy(t.Obj, 1) + if t.Provenance != "" { + ts.updateNodeBy(t.Provenance, 1) + } +} + +func (ts *MongoTripleStore) AddTripleSet(in []*graph.Triple) { + ts.session.SetSafe(nil) + idMap := make(map[string]int) + for _, t := range in { + wrote := ts.writeTriple(t) + if wrote { + idMap[t.Sub]++ + idMap[t.Obj]++ + idMap[t.Pred]++ + if t.Provenance != "" { + idMap[t.Provenance]++ + } + } + } + for k, v := range idMap { + ts.updateNodeBy(k, v) + } + ts.session.SetSafe(&mgo.Safe{}) +} + +func (ts *MongoTripleStore) RemoveTriple(t *graph.Triple) { + err := ts.db.C("triples").RemoveId(ts.getIdForTriple(t)) + if err == mgo.ErrNotFound { + return + } else if err != nil { + log.Println("Error: ", err, " while removing triple ", t) + return + } + ts.updateNodeBy(t.Sub, -1) + ts.updateNodeBy(t.Pred, -1) + ts.updateNodeBy(t.Obj, -1) + if t.Provenance != "" { + ts.updateNodeBy(t.Provenance, -1) + } +} + +func (ts *MongoTripleStore) GetTriple(val graph.TSVal) *graph.Triple { + var bsonDoc bson.M + err := ts.db.C("triples").FindId(val.(string)).One(&bsonDoc) + if err != nil { + log.Println("Error: Couldn't retrieve triple", val.(string), err) + } + return graph.MakeTriple( + bsonDoc["Sub"].(string), + bsonDoc["Pred"].(string), + bsonDoc["Obj"].(string), + bsonDoc["Provenance"].(string)) +} + +func (ts *MongoTripleStore) GetTripleIterator(dir string, val graph.TSVal) graph.Iterator { + return NewMongoIterator(ts, "triples", dir, val) +} + +func (ts *MongoTripleStore) GetNodesAllIterator() graph.Iterator { + return NewMongoAllIterator(ts, "nodes") +} + +func (ts *MongoTripleStore) GetTriplesAllIterator() graph.Iterator { + return NewMongoAllIterator(ts, "triples") +} + +func (ts *MongoTripleStore) GetIdFor(s string) graph.TSVal { + return ts.ConvertStringToByteHash(s) +} + +func (ts *MongoTripleStore) GetNameFor(v graph.TSVal) string { + val, ok := ts.idCache.Get(v.(string)) + if ok { + return val + } + var node MongoNode + err := ts.db.C("nodes").FindId(v.(string)).One(&node) + if err != nil { + log.Println("Error: Couldn't retrieve node", v.(string), err) + } + ts.idCache.Put(v.(string), node.Name) + return node.Name +} + +func (ts *MongoTripleStore) Size() int64 { + count, err := ts.db.C("triples").Count() + if err != nil { + glog.Error("Error: ", err) + return 0 + } + return int64(count) +} + +func compareStrings(a, b graph.TSVal) bool { + return a.(string) == b.(string) +} + +func (ts *MongoTripleStore) MakeFixed() *graph.FixedIterator { + return graph.NewFixedIteratorWithCompare(compareStrings) +} + +func (ts *MongoTripleStore) Close() { + ts.db.Session.Close() +} + +func (ts *MongoTripleStore) GetTripleDirection(in graph.TSVal, dir string) graph.TSVal { + // Maybe do the trick here + var offset int + switch dir { + case "s": + offset = 0 + case "p": + offset = (ts.hasher.Size() * 2) + case "o": + offset = (ts.hasher.Size() * 2) * 2 + case "c": + offset = (ts.hasher.Size() * 2) * 3 + } + val := in.(string)[offset : ts.hasher.Size()*2+offset] + return val +} + +func (ts *MongoTripleStore) BulkLoad(t_chan chan *graph.Triple) { + ts.session.SetSafe(nil) + for triple := range t_chan { + ts.writeTriple(triple) + } + outputTo := bson.M{"replace": "nodes", "sharded": true} + glog.Infoln("Mapreducing") + job := mgo.MapReduce{ + Map: `function() { + var len = this["_id"].length + var s_key = this["_id"].slice(0, len / 4) + var p_key = this["_id"].slice(len / 4, 2 * len / 4) + var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4) + var c_key = this["_id"].slice(3 * len / 4) + emit(s_key, {"_id": s_key, "Name" : this.Sub, "Size" : 1}) + emit(p_key, {"_id": p_key, "Name" : this.Pred, "Size" : 1}) + emit(o_key, {"_id": o_key, "Name" : this.Obj, "Size" : 1}) + if (this.Provenance != "") { + emit(c_key, {"_id": c_key, "Name" : this.Provenance, "Size" : 1}) + } + } + `, + Reduce: ` + function(key, value_list) { + out = {"_id": key, "Name": value_list[0].Name} + count = 0 + for (var i = 0; i < value_list.length; i++) { + count = count + value_list[i].Size + + } + out["Size"] = count + return out + } + `, + Out: outputTo, + } + ts.db.C("triples").Find(nil).MapReduce(&job, nil) + glog.Infoln("Fixing") + ts.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) { + db.nodes.update({"_id": result._id}, result.value) + }) }`}, {"args", bson.D{}}}, nil) + + ts.session.SetSafe(&mgo.Safe{}) +} diff --git a/graph/optional-iterator.go b/graph/optional-iterator.go new file mode 100644 index 0000000..8050ba8 --- /dev/null +++ b/graph/optional-iterator.go @@ -0,0 +1,134 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// "Optional" is kind of odd. It's not an iterator in the strictest sense, but +// it's easier to implement as an iterator. +// +// Consider what it means. It means that we have a subconstraint which we do +// not want to constrain the query -- we just want it to return the matching +// subgraph if one matches at all. By analogy to regular expressions, it is the +// '?' operator. +// +// If it were a proper iterator of its own (and indeed, a reasonable refactor +// of this iterator would be to make it such) it would contain an all iterator +// -- all things in the graph. It matches everything (as does the regex "(a)?") + +import ( + "fmt" + "github.com/barakmich/glog" + "strings" +) + +// An optional iterator has the subconstraint iterator we wish to be optional +// and whether the last check we received was true or false. +type OptionalIterator struct { + BaseIterator + subIt Iterator + lastCheck bool +} + +// Creates a new optional iterator. +func NewOptionalIterator(it Iterator) *OptionalIterator { + var o OptionalIterator + BaseIteratorInit(&o.BaseIterator) + o.nextable = false + o.subIt = it + return &o +} + +func (o *OptionalIterator) Reset() { + o.subIt.Reset() + o.lastCheck = false +} + +func (o *OptionalIterator) Close() { + o.subIt.Close() +} + +func (o *OptionalIterator) Clone() Iterator { + out := NewOptionalIterator(o.subIt.Clone()) + out.CopyTagsFrom(o) + return out +} + +// Nexting the iterator is unsupported -- error and return an empty set. +// (As above, a reasonable alternative would be to Next() an all iterator) +func (o *OptionalIterator) Next() (TSVal, bool) { + glog.Errorln("Nexting an un-nextable iterator") + return nil, false +} + +// An optional iterator only has a next result if, (a) last time we checked +// we had any results whatsoever, and (b) there was another subresult in our +// optional subbranch. +func (o *OptionalIterator) NextResult() bool { + if o.lastCheck { + return o.subIt.NextResult() + } + return false +} + +// Check() is the real hack of this iterator. It always returns true, regardless +// of whether the subiterator matched. But we keep track of whether the subiterator +// matched for results purposes. +func (o *OptionalIterator) Check(val TSVal) bool { + checked := o.subIt.Check(val) + o.lastCheck = checked + o.Last = val + return true +} + +// If we failed the check, then the subiterator should not contribute to the result +// set. Otherwise, go ahead and tag it. +func (o *OptionalIterator) TagResults(out *map[string]TSVal) { + if o.lastCheck == false { + return + } + o.subIt.TagResults(out) +} + +// Registers the optional iterator. +func (o *OptionalIterator) Type() string { return "optional" } + +// Prints the optional and it's subiterator. +func (o *OptionalIterator) DebugString(indent int) string { + return fmt.Sprintf("%s(%s tags:%s\n%s)", + strings.Repeat(" ", indent), + o.Type(), + o.Tags(), + o.subIt.DebugString(indent+4)) +} + +// There's nothing to optimize for an optional. Optimize the subiterator and +// potentially replace it. +func (o *OptionalIterator) Optimize() (Iterator, bool) { + newSub, changed := o.subIt.Optimize() + if changed { + o.subIt.Close() + o.subIt = newSub + } + return o, false +} + +// We're only as expensive as our subiterator. Except, we can't be nexted. +func (o *OptionalIterator) GetStats() *IteratorStats { + subStats := o.subIt.GetStats() + return &IteratorStats{ + CheckCost: subStats.CheckCost, + NextCost: int64(1 << 62), + Size: subStats.Size, + } +} diff --git a/graph/or-iterator.go b/graph/or-iterator.go new file mode 100644 index 0000000..a0de623 --- /dev/null +++ b/graph/or-iterator.go @@ -0,0 +1,287 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines the or and short-circuiting or iterator. Or is the union operator for it's subiterators. +// Short-circuiting-or is a little different. It will return values from the first iterator that returns +// values at all, and then stops. +// +// Never reorders the iterators from the order they arrive. It is either the union or the first one. +// May return the same value twice -- once for each branch. + +import ( + "container/list" + "fmt" + "strings" +) + +type OrIterator struct { + BaseIterator + isShortCircuiting bool + internalIterators []Iterator + itCount int + currentIterator int +} + +func NewOrIterator() *OrIterator { + var or OrIterator + BaseIteratorInit(&or.BaseIterator) + or.internalIterators = make([]Iterator, 0, 20) + or.isShortCircuiting = false + or.currentIterator = -1 + return &or +} + +func NewShortCircuitOrIterator() *OrIterator { + var or OrIterator + BaseIteratorInit(&or.BaseIterator) + or.internalIterators = make([]Iterator, 0, 20) + or.isShortCircuiting = true + or.currentIterator = -1 + return &or +} + +// Reset all internal iterators +func (or *OrIterator) Reset() { + for _, it := range or.internalIterators { + it.Reset() + } + or.currentIterator = -1 +} + +func (or *OrIterator) Clone() Iterator { + var newOr *OrIterator + if or.isShortCircuiting { + newOr = NewShortCircuitOrIterator() + } else { + newOr = NewOrIterator() + } + for _, it := range or.internalIterators { + newOr.AddSubIterator(it.Clone()) + } + or.CopyTagsFrom(or) + return newOr +} + +// Returns a list.List of the subiterators, in order. +func (or *OrIterator) GetSubIterators() *list.List { + l := list.New() + for _, it := range or.internalIterators { + l.PushBack(it) + } + return l +} + +// Overrides BaseIterator TagResults, as it needs to add it's own results and +// recurse down it's subiterators. +func (or *OrIterator) TagResults(out *map[string]TSVal) { + or.BaseIterator.TagResults(out) + or.internalIterators[or.currentIterator].TagResults(out) +} + +// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators. +func (or *OrIterator) GetResultTree() *ResultTree { + tree := NewResultTree(or.LastResult()) + for _, it := range or.internalIterators { + tree.AddSubtree(it.GetResultTree()) + } + return tree +} + +// Prints information about this iterator. +func (or *OrIterator) DebugString(indent int) string { + var total string + for i, it := range or.internalIterators { + total += strings.Repeat(" ", indent+2) + total += fmt.Sprintf("%d:\n%s\n", i, it.DebugString(indent+4)) + } + var tags string + for _, k := range or.Tags() { + tags += fmt.Sprintf("%s;", k) + } + spaces := strings.Repeat(" ", indent+2) + + return fmt.Sprintf("%s(%s\n%stags:%s\n%sits:\n%s)", + strings.Repeat(" ", indent), + or.Type(), + spaces, + tags, + spaces, + total) +} + +// Add a subiterator to this Or iterator. Order matters. +func (or *OrIterator) AddSubIterator(sub Iterator) { + or.internalIterators = append(or.internalIterators, sub) + or.itCount++ +} + +// Returns the Next value from the Or iterator. Because the Or is the +// union of its subiterators, it must produce from all subiterators -- unless +// it's shortcircuiting, in which case, it's the first one that returns anything. +func (or *OrIterator) Next() (TSVal, bool) { + NextLogIn(or) + var curr TSVal + var exists bool + firstTime := false + for { + if or.currentIterator == -1 { + or.currentIterator = 0 + firstTime = true + } + curIt := or.internalIterators[or.currentIterator] + curr, exists = curIt.Next() + if !exists { + if or.isShortCircuiting && !firstTime { + return NextLogOut(or, nil, false) + } + or.currentIterator++ + if or.currentIterator == or.itCount { + return NextLogOut(or, nil, false) + } + } else { + or.Last = curr + return NextLogOut(or, curr, true) + } + } + panic("Somehow broke out of Next() loop in OrIterator") +} + +// Checks a value against the iterators, in order. +func (or *OrIterator) checkSubIts(val TSVal) bool { + var subIsGood = false + for i, it := range or.internalIterators { + subIsGood = it.Check(val) + if subIsGood { + or.currentIterator = i + break + } + } + return subIsGood +} + +// Check a value against the entire iterator, in order. +func (or *OrIterator) Check(val TSVal) bool { + CheckLogIn(or, val) + anyGood := or.checkSubIts(val) + if !anyGood { + return CheckLogOut(or, val, false) + } + or.Last = val + return CheckLogOut(or, val, true) +} + +// Returns the approximate size of the Or iterator. Because we're dealing +// with a union, we know that the largest we can be is the sum of all the iterators, +// or in the case of short-circuiting, the longest. +func (or *OrIterator) Size() (int64, bool) { + var val int64 + var b bool + if or.isShortCircuiting { + val = 0 + b = true + for _, it := range or.internalIterators { + newval, newb := it.Size() + if val < newval { + val = newval + } + b = newb && b + } + } else { + val = 0 + b = true + for _, it := range or.internalIterators { + newval, newb := it.Size() + val += newval + b = newb && b + } + } + return val, b +} + +// An Or has no NextResult of its own -- that is, there are no other values +// which satisfy our previous result that are not the result itself. Our +// subiterators might, however, so just pass the call recursively. In the case of +// shortcircuiting, only allow new results from the currently checked iterator +func (or *OrIterator) NextResult() bool { + if or.currentIterator != -1 { + return or.internalIterators[or.currentIterator].NextResult() + } + return false +} + +// Perform or-specific cleanup, of which there currently is none. +func (or *OrIterator) cleanUp() {} + +// Close this iterator, and, by extension, close the subiterators. +// Close should be idempotent, and it follows that if it's subiterators +// follow this contract, the And follows the contract. +func (or *OrIterator) Close() { + or.cleanUp() + for _, it := range or.internalIterators { + it.Close() + } +} + +func (or *OrIterator) Optimize() (Iterator, bool) { + oldItList := or.GetSubIterators() + itList := optimizeSubIterators(oldItList) + // Close the replaced iterators (they ought to close themselves, but Close() + // is idempotent, so this just protects against any machinations). + closeIteratorList(oldItList, nil) + newOr := NewOrIterator() + newOr.isShortCircuiting = or.isShortCircuiting + + // Add the subiterators in order. + for e := itList.Front(); e != nil; e = e.Next() { + newOr.AddSubIterator(e.Value.(Iterator)) + } + + // Move the tags hanging on us (like any good replacement). + newOr.CopyTagsFrom(or) + + // And close ourselves but not our subiterators -- some may still be alive in + // the new And (they were unchanged upon calling Optimize() on them, at the + // start). + or.cleanUp() + return newOr, true +} + +func (or *OrIterator) GetStats() *IteratorStats { + CheckCost := int64(0) + NextCost := int64(0) + Size := int64(0) + for _, it := range or.internalIterators { + stats := it.GetStats() + NextCost += stats.NextCost + CheckCost += stats.CheckCost + if or.isShortCircuiting { + if Size < stats.Size { + Size = stats.Size + } + } else { + Size += stats.Size + } + } + return &IteratorStats{ + CheckCost: CheckCost, + NextCost: NextCost, + Size: Size, + } + +} + +// Register this as an "or" iterator. +func (or *OrIterator) Type() string { return "or" } diff --git a/graph/or-iterator_test.go b/graph/or-iterator_test.go new file mode 100644 index 0000000..9450094 --- /dev/null +++ b/graph/or-iterator_test.go @@ -0,0 +1,142 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func extractNumbersFromIterator(it Iterator) []int { + var outputNumbers []int + for { + val, ok := it.Next() + if !ok { + break + } + outputNumbers = append(outputNumbers, val.(int)) + } + return outputNumbers +} + +func TestOrIteratorBasics(t *testing.T) { + var orIt *OrIterator + + Convey("Given an Or Iterator of two fixed iterators", t, func() { + orIt = NewOrIterator() + fixed1 := newFixedIterator() + fixed1.AddValue(1) + fixed1.AddValue(2) + fixed1.AddValue(3) + fixed2 := newFixedIterator() + fixed2.AddValue(3) + fixed2.AddValue(9) + fixed2.AddValue(20) + fixed2.AddValue(21) + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + + Convey("It should guess its size.", func() { + v, _ := orIt.Size() + So(v, ShouldEqual, 7) + }) + + Convey("It should extract all the numbers, potentially twice.", func() { + allNumbers := []int{1, 2, 3, 3, 9, 20, 21} + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + orIt.Reset() + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + // Optimization works + newOr, _ := orIt.Optimize() + So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) + }) + + Convey("It should check that numbers in either iterator exist.", func() { + So(orIt.Check(2), ShouldEqual, true) + So(orIt.Check(3), ShouldEqual, true) + So(orIt.Check(21), ShouldEqual, true) + }) + + Convey("It should check that numbers not in either iterator are false.", func() { + So(orIt.Check(22), ShouldEqual, false) + So(orIt.Check(5), ShouldEqual, false) + So(orIt.Check(0), ShouldEqual, false) + }) + + }) + +} + +func TestShortCircuitingOrBasics(t *testing.T) { + var orIt *OrIterator + + Convey("Given a short-circuiting Or of two fixed iterators", t, func() { + orIt = NewShortCircuitOrIterator() + fixed1 := newFixedIterator() + fixed1.AddValue(1) + fixed1.AddValue(2) + fixed1.AddValue(3) + fixed2 := newFixedIterator() + fixed2.AddValue(3) + fixed2.AddValue(9) + fixed2.AddValue(20) + fixed2.AddValue(21) + + Convey("It should guess its size.", func() { + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + v, _ := orIt.Size() + So(v, ShouldEqual, 4) + }) + + Convey("It should extract the first iterators' numbers.", func() { + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + allNumbers := []int{1, 2, 3} + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + orIt.Reset() + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + // Optimization works + newOr, _ := orIt.Optimize() + So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) + }) + + Convey("It should check that numbers in either iterator exist.", func() { + orIt.AddSubIterator(fixed1) + orIt.AddSubIterator(fixed2) + So(orIt.Check(2), ShouldEqual, true) + So(orIt.Check(3), ShouldEqual, true) + So(orIt.Check(21), ShouldEqual, true) + So(orIt.Check(22), ShouldEqual, false) + So(orIt.Check(5), ShouldEqual, false) + So(orIt.Check(0), ShouldEqual, false) + + }) + + Convey("It should check that it pulls the second iterator's numbers if the first is empty.", func() { + orIt.AddSubIterator(newFixedIterator()) + orIt.AddSubIterator(fixed2) + allNumbers := []int{3, 9, 20, 21} + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + orIt.Reset() + So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) + // Optimization works + newOr, _ := orIt.Optimize() + So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) + }) + + }) + +} diff --git a/graph/query-shape.go b/graph/query-shape.go new file mode 100644 index 0000000..dece079 --- /dev/null +++ b/graph/query-shape.go @@ -0,0 +1,189 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +type Node struct { + Id int `json:"id"` + Tags []string `json:"tags,omitempty"` + Values []string `json:"values,omitempty"` + IsLinkNode bool `json:"is_link_node"` + IsFixed bool `json:"is_fixed"` +} + +type Link struct { + Source int `json:"source"` + Target int `json:"target"` + Pred int `json:"type"` + LinkNode int `json:"link_node"` +} + +type queryShape struct { + nodes []Node + links []Link + ts TripleStore + nodeId int + hasaIds []int + hasaDirs []string +} + +func OutputQueryShapeForIterator(it Iterator, ts TripleStore, outputMap *map[string]interface{}) { + qs := &queryShape{} + qs.nodes = make([]Node, 0) + qs.links = make([]Link, 0) + qs.hasaIds = make([]int, 0) + qs.hasaDirs = make([]string, 0) + qs.ts = ts + qs.nodeId = 1 + + node := qs.MakeNode(it.Clone()) + qs.AddNode(node) + (*outputMap)["nodes"] = qs.nodes + (*outputMap)["links"] = qs.links +} + +func (qs *queryShape) AddNode(n *Node) { + qs.nodes = append(qs.nodes, *n) +} + +func (qs *queryShape) AddLink(l *Link) { + qs.links = append(qs.links, *l) +} + +func (qs *queryShape) LastHasa() (int, string) { + return qs.hasaIds[len(qs.hasaIds)-1], qs.hasaDirs[len(qs.hasaDirs)-1] +} + +func (qs *queryShape) PushHasa(i int, s string) { + qs.hasaIds = append(qs.hasaIds, i) + qs.hasaDirs = append(qs.hasaDirs, s) +} + +func (qs *queryShape) RemoveHasa() { + qs.hasaIds = qs.hasaIds[:len(qs.hasaIds)-1] + qs.hasaDirs = qs.hasaDirs[:len(qs.hasaDirs)-1] +} + +func (qs *queryShape) StealNode(left *Node, right *Node) { + for _, v := range right.Values { + left.Values = append(left.Values, v) + } + for _, v := range right.Tags { + left.Tags = append(left.Tags, v) + } + left.IsLinkNode = left.IsLinkNode || right.IsLinkNode + left.IsFixed = left.IsFixed || right.IsFixed + for i, link := range qs.links { + rewrite := false + if link.LinkNode == right.Id { + link.LinkNode = left.Id + rewrite = true + } + if link.Source == right.Id { + link.Source = left.Id + rewrite = true + } + if link.Target == right.Id { + link.Target = left.Id + rewrite = true + } + if rewrite { + qs.links = append(append(qs.links[:i], qs.links[i+1:]...), link) + } + } +} + +func (qs *queryShape) MakeNode(it Iterator) *Node { + var n Node + n.IsLinkNode = false + n.IsFixed = false + n.Id = qs.nodeId + n.Tags = make([]string, 0) + n.Values = make([]string, 0) + for _, tag := range it.Tags() { + n.Tags = append(n.Tags, tag) + } + for k, _ := range it.FixedTags() { + n.Tags = append(n.Tags, k) + } + + switch it.Type() { + case "and": + list := it.GetSubIterators() + for e := list.Front(); e != nil; e = e.Next() { + subit := e.Value.(Iterator) + qs.nodeId++ + newNode := qs.MakeNode(subit) + if subit.Type() != "or" { + qs.StealNode(&n, newNode) + } else { + qs.AddNode(newNode) + qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) + } + } + case "fixed": + n.IsFixed = true + for { + val, more := it.Next() + if !more { + break + } + n.Values = append(n.Values, qs.ts.GetNameFor(val)) + } + case "hasa": + hasa := it.(*HasaIterator) + qs.PushHasa(n.Id, hasa.direction) + qs.nodeId++ + newNode := qs.MakeNode(hasa.primaryIt) + qs.AddNode(newNode) + qs.RemoveHasa() + case "or": + list := it.GetSubIterators() + for e := list.Front(); e != nil; e = e.Next() { + subit := e.Value.(Iterator) + qs.nodeId++ + newNode := qs.MakeNode(subit) + if subit.Type() == "or" { + qs.StealNode(&n, newNode) + } else { + qs.AddNode(newNode) + qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) + } + } + case "linksto": + n.IsLinkNode = true + lto := it.(*LinksToIterator) + qs.nodeId++ + newNode := qs.MakeNode(lto.primaryIt) + hasaID, hasaDir := qs.LastHasa() + if (hasaDir == "s" && lto.direction == "o") || + (hasaDir == "o" && lto.direction == "s") { + qs.AddNode(newNode) + if hasaDir == "s" { + qs.AddLink(&Link{hasaID, newNode.Id, 0, n.Id}) + } else { + qs.AddLink(&Link{newNode.Id, hasaID, 0, n.Id}) + } + } else if lto.primaryIt.Type() == "fixed" { + qs.StealNode(&n, newNode) + } else { + qs.AddNode(newNode) + } + case "optional": + // Unsupported, for the moment + fallthrough + case "all": + } + return &n +} diff --git a/graph/query-shape_test.go b/graph/query-shape_test.go new file mode 100644 index 0000000..b0d6950 --- /dev/null +++ b/graph/query-shape_test.go @@ -0,0 +1,124 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func buildHasaWithTag(ts TripleStore, tag string, target string) *HasaIterator { + fixed_obj := ts.MakeFixed() + fixed_pred := ts.MakeFixed() + fixed_obj.AddValue(ts.GetIdFor(target)) + fixed_pred.AddValue(ts.GetIdFor("status")) + fixed_obj.AddTag(tag) + lto1 := NewLinksToIterator(ts, fixed_obj, "o") + lto2 := NewLinksToIterator(ts, fixed_pred, "p") + and := NewAndIterator() + and.AddSubIterator(lto1) + and.AddSubIterator(lto2) + hasa := NewHasaIterator(ts, and, "s") + return hasa +} + +func TestQueryShape(t *testing.T) { + var queryShape map[string]interface{} + var ts *TestTripleStore + ts = new(TestTripleStore) + ts.On("GetIdFor", "cool").Return(1) + ts.On("GetNameFor", 1).Return("cool") + ts.On("GetIdFor", "status").Return(2) + ts.On("GetNameFor", 2).Return("status") + ts.On("GetIdFor", "fun").Return(3) + ts.On("GetNameFor", 3).Return("fun") + ts.On("GetIdFor", "name").Return(4) + ts.On("GetNameFor", 4).Return("name") + + Convey("Given a single linkage iterator's shape", t, func() { + queryShape = make(map[string]interface{}) + hasa := buildHasaWithTag(ts, "tag", "cool") + hasa.AddTag("top") + OutputQueryShapeForIterator(hasa, ts, &queryShape) + + Convey("It should have three nodes and one link", func() { + nodes := queryShape["nodes"].([]Node) + links := queryShape["links"].([]Link) + So(len(nodes), ShouldEqual, 3) + So(len(links), ShouldEqual, 1) + }) + + Convey("These nodes should be correctly tagged", func() { + nodes := queryShape["nodes"].([]Node) + So(nodes[0].Tags, ShouldResemble, []string{"tag"}) + So(nodes[1].IsLinkNode, ShouldEqual, true) + So(nodes[2].Tags, ShouldResemble, []string{"top"}) + + }) + + Convey("The link should be correctly typed", func() { + nodes := queryShape["nodes"].([]Node) + links := queryShape["links"].([]Link) + So(links[0].Source, ShouldEqual, nodes[2].Id) + So(links[0].Target, ShouldEqual, nodes[0].Id) + So(links[0].LinkNode, ShouldEqual, nodes[1].Id) + So(links[0].Pred, ShouldEqual, 0) + + }) + + }) + + Convey("Given a name-of-an-and-iterator's shape", t, func() { + queryShape = make(map[string]interface{}) + hasa1 := buildHasaWithTag(ts, "tag1", "cool") + hasa1.AddTag("hasa1") + hasa2 := buildHasaWithTag(ts, "tag2", "fun") + hasa1.AddTag("hasa2") + andInternal := NewAndIterator() + andInternal.AddSubIterator(hasa1) + andInternal.AddSubIterator(hasa2) + fixed_pred := ts.MakeFixed() + fixed_pred.AddValue(ts.GetIdFor("name")) + lto1 := NewLinksToIterator(ts, andInternal, "s") + lto2 := NewLinksToIterator(ts, fixed_pred, "p") + and := NewAndIterator() + and.AddSubIterator(lto1) + and.AddSubIterator(lto2) + hasa := NewHasaIterator(ts, and, "o") + OutputQueryShapeForIterator(hasa, ts, &queryShape) + + Convey("It should have seven nodes and three links", func() { + nodes := queryShape["nodes"].([]Node) + links := queryShape["links"].([]Link) + So(len(nodes), ShouldEqual, 7) + So(len(links), ShouldEqual, 3) + }) + + Convey("Three of the nodes are link nodes, four aren't", func() { + nodes := queryShape["nodes"].([]Node) + count := 0 + for _, node := range nodes { + if node.IsLinkNode { + count++ + } + } + So(count, ShouldEqual, 3) + }) + + Convey("These nodes should be correctly tagged", nil) + + }) + +} diff --git a/graph/result-tree-evaluator.go b/graph/result-tree-evaluator.go new file mode 100644 index 0000000..e75cf56 --- /dev/null +++ b/graph/result-tree-evaluator.go @@ -0,0 +1,70 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "container/list" + "fmt" +) + +type ResultTree struct { + result TSVal + subtrees *list.List +} + +func NewResultTree(result TSVal) *ResultTree { + var tree ResultTree + tree.subtrees = list.New() + tree.result = result + return &tree +} + +func (tree *ResultTree) ToString() string { + base := fmt.Sprintf("(%d", tree.result) + if tree.subtrees.Len() != 0 { + for e := tree.subtrees.Front(); e != nil; e = e.Next() { + base += fmt.Sprintf(" %s", (e.Value.(*ResultTree)).ToString()) + } + } + base += ")" + return base +} + +func (tree *ResultTree) AddSubtree(sub *ResultTree) { + tree.subtrees.PushBack(sub) +} + +func StringResultTreeEvaluator(it Iterator) string { + ok := true + out := "" + for { + _, ok = it.Next() + if !ok { + break + } + out += it.GetResultTree().ToString() + out += "\n" + for it.NextResult() == true { + out += " " + out += it.GetResultTree().ToString() + out += "\n" + } + } + return out +} + +func PrintResultTreeEvaluator(it Iterator) { + fmt.Print(StringResultTreeEvaluator(it)) +} diff --git a/graph/result-tree-evaluator_test.go b/graph/result-tree-evaluator_test.go new file mode 100644 index 0000000..349bc08 --- /dev/null +++ b/graph/result-tree-evaluator_test.go @@ -0,0 +1,42 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "testing" +) + +func TestSingleIterator(t *testing.T) { + all := NewInt64AllIterator(1, 3) + result := StringResultTreeEvaluator(all) + expected := "(1)\n(2)\n(3)\n" + if expected != result { + t.Errorf("Expected \"%s\" got \"%s\"", expected, result) + } +} + +func TestAndIterator(t *testing.T) { + all1 := NewInt64AllIterator(1, 3) + all2 := NewInt64AllIterator(3, 5) + and := NewAndIterator() + and.AddSubIterator(all1) + and.AddSubIterator(all2) + + result := StringResultTreeEvaluator(and) + expected := "(3 (3) (3))\n" + if expected != result { + t.Errorf("Expected \"%s\" got \"%s\"", expected, result) + } +} diff --git a/graph/session.go b/graph/session.go new file mode 100644 index 0000000..03a137b --- /dev/null +++ b/graph/session.go @@ -0,0 +1,45 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines the graph session interface general to all query languages. + +type ParseResult int + +const ( + Parsed ParseResult = iota + ParseMore + ParseFail +) + +type Session interface { + // Return whether the string is a valid expression. + InputParses(string) (ParseResult, error) + ExecInput(string, chan interface{}, int) + ToText(interface{}) string + ToggleDebug() +} + +type HttpSession interface { + // Return whether the string is a valid expression. + InputParses(string) (ParseResult, error) + // Runs the query and returns individual results on the channel. + ExecInput(string, chan interface{}, int) + GetQuery(string, chan map[string]interface{}) + BuildJson(interface{}) + GetJson() (interface{}, error) + ClearJson() + ToggleDebug() +} diff --git a/graph/sexp/parser.go b/graph/sexp/parser.go new file mode 100644 index 0000000..2efcf49 --- /dev/null +++ b/graph/sexp/parser.go @@ -0,0 +1,271 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sexp + +import ( + "github.com/badgerodon/peg" + + "github.com/google/cayley/graph" +) + +func BuildIteratorTreeForQuery(ts graph.TripleStore, query string) graph.Iterator { + tree := parseQuery(query) + return buildIteratorTree(tree, ts) +} + +func ParseString(input string) string { + return parseQuery(input).String() +} + +func parseQuery(input string) *peg.ExpressionTree { + parser := peg.NewParser() + + start := parser.NonTerminal("Start") + whitespace := parser.NonTerminal("Whitespace") + quotedString := parser.NonTerminal("QuotedString") + rootConstraint := parser.NonTerminal("RootConstraint") + + constraint := parser.NonTerminal("Constraint") + colonIdentifier := parser.NonTerminal("ColonIdentifier") + variable := parser.NonTerminal("Variable") + identifier := parser.NonTerminal("Identifier") + fixedNode := parser.NonTerminal("FixedNode") + nodeIdent := parser.NonTerminal("NodeIdentifier") + predIdent := parser.NonTerminal("PredIdentifier") + reverse := parser.NonTerminal("Reverse") + predKeyword := parser.NonTerminal("PredicateKeyword") + optional := parser.NonTerminal("OptionalKeyword") + + start.Expression = rootConstraint + + whitespace.Expression = parser.OneOrMore( + parser.OrderedChoice( + parser.Terminal(' '), + parser.Terminal('\t'), + parser.Terminal('\n'), + parser.Terminal('\r'), + ), + ) + + quotedString.Expression = parser.Sequence( + parser.Terminal('"'), + parser.OneOrMore( + parser.OrderedChoice( + parser.Range('0', '9'), + parser.Range('a', 'z'), + parser.Range('A', 'Z'), + parser.Terminal('_'), + parser.Terminal('/'), + parser.Terminal(':'), + parser.Terminal(' '), + parser.Terminal('\''), + ), + ), + parser.Terminal('"'), + ) + + predKeyword.Expression = parser.OrderedChoice( + optional, + ) + + optional.Expression = parser.Sequence( + parser.Terminal('o'), + parser.Terminal('p'), + parser.Terminal('t'), + parser.Terminal('i'), + parser.Terminal('o'), + parser.Terminal('n'), + parser.Terminal('a'), + parser.Terminal('l'), + ) + + identifier.Expression = parser.OneOrMore( + parser.OrderedChoice( + parser.Range('0', '9'), + parser.Range('a', 'z'), + parser.Range('A', 'Z'), + parser.Terminal('_'), + parser.Terminal('.'), + parser.Terminal('/'), + parser.Terminal(':'), + parser.Terminal('#'), + ), + ) + + reverse.Expression = parser.Terminal('!') + + variable.Expression = parser.Sequence( + parser.Terminal('$'), + identifier, + ) + + colonIdentifier.Expression = parser.Sequence( + parser.Terminal(':'), + identifier, + ) + + fixedNode.Expression = parser.OrderedChoice( + colonIdentifier, + quotedString, + ) + + nodeIdent.Expression = parser.OrderedChoice( + variable, + fixedNode, + ) + + predIdent.Expression = parser.Sequence( + parser.Optional(reverse), + parser.OrderedChoice( + nodeIdent, + constraint, + ), + ) + + constraint.Expression = parser.Sequence( + parser.Terminal('('), + parser.Optional(whitespace), + predIdent, + parser.Optional(whitespace), + parser.Optional(predKeyword), + parser.Optional(whitespace), + parser.OrderedChoice( + nodeIdent, + rootConstraint, + ), + parser.Optional(whitespace), + parser.Terminal(')'), + ) + + rootConstraint.Expression = parser.Sequence( + parser.Terminal('('), + parser.Optional(whitespace), + nodeIdent, + parser.Optional(whitespace), + parser.ZeroOrMore(parser.Sequence( + constraint, + parser.Optional(whitespace), + )), + parser.Terminal(')'), + ) + + tree := parser.Parse(input) + return tree +} + +func getIdentString(tree *peg.ExpressionTree) string { + out := "" + if len(tree.Children) > 0 { + for _, child := range tree.Children { + out += getIdentString(child) + } + } else { + if tree.Value != '"' { + out += string(tree.Value) + } + } + return out +} + +func buildIteratorTree(tree *peg.ExpressionTree, ts graph.TripleStore) graph.Iterator { + switch tree.Name { + case "Start": + return buildIteratorTree(tree.Children[0], ts) + case "NodeIdentifier": + var out graph.Iterator + nodeID := getIdentString(tree) + if tree.Children[0].Name == "Variable" { + allIt := ts.GetNodesAllIterator() + allIt.AddTag(nodeID) + out = allIt + } else { + n := nodeID + if tree.Children[0].Children[0].Name == "ColonIdentifier" { + n = nodeID[1:] + } + fixed := ts.MakeFixed() + fixed.AddValue(ts.GetIdFor(n)) + out = fixed + } + return out + case "PredIdentifier": + i := 0 + if tree.Children[0].Name == "Reverse" { + //Taken care of below + i++ + } + it := buildIteratorTree(tree.Children[i], ts) + lto := graph.NewLinksToIterator(ts, it, "p") + return lto + case "RootConstraint": + constraintCount := 0 + and := graph.NewAndIterator() + for _, c := range tree.Children { + switch c.Name { + case "NodeIdentifier": + fallthrough + case "Constraint": + it := buildIteratorTree(c, ts) + and.AddSubIterator(it) + constraintCount++ + continue + default: + continue + } + } + return and + case "Constraint": + var hasa *graph.HasaIterator + topLevelDir := "s" + subItDir := "o" + subAnd := graph.NewAndIterator() + isOptional := false + for _, c := range tree.Children { + switch c.Name { + case "PredIdentifier": + if c.Children[0].Name == "Reverse" { + topLevelDir = "o" + subItDir = "s" + } + it := buildIteratorTree(c, ts) + subAnd.AddSubIterator(it) + continue + case "PredicateKeyword": + switch c.Children[0].Name { + case "OptionalKeyword": + isOptional = true + } + case "NodeIdentifier": + fallthrough + case "RootConstraint": + it := buildIteratorTree(c, ts) + l := graph.NewLinksToIterator(ts, it, subItDir) + subAnd.AddSubIterator(l) + continue + default: + continue + } + } + hasa = graph.NewHasaIterator(ts, subAnd, topLevelDir) + if isOptional { + optional := graph.NewOptionalIterator(hasa) + return optional + } + return hasa + default: + return &graph.NullIterator{} + } + panic("Not reached") +} diff --git a/graph/sexp/parser_test.go b/graph/sexp/parser_test.go new file mode 100644 index 0000000..f72239c --- /dev/null +++ b/graph/sexp/parser_test.go @@ -0,0 +1,129 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sexp + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/memstore" +) + +func TestBadParse(t *testing.T) { + str := ParseString("()") + if str != "" { + t.Errorf("It parsed! Got \"%s\"", str) + } +} + +func TestParseSexpWithMemstore(t *testing.T) { + Convey("With a Memstore", t, func() { + ts := memstore.NewMemTripleStore() + + Convey("It should parse an empty query", func() { + it := BuildIteratorTreeForQuery(ts, "()") + So(it.Type(), ShouldEqual, "null") + }) + + Convey("It should get a single triple linkage", func() { + ts.AddTriple(graph.MakeTriple("i", "can", "win", "")) + query := "($a (:can \"win\"))" + So(len(query), ShouldEqual, 17) + it := BuildIteratorTreeForQuery(ts, query) + So(it.Type(), ShouldEqual, "and") + out, ok := it.Next() + So(ok, ShouldBeTrue) + So(out, ShouldEqual, ts.GetIdFor("i")) + }) + + Convey("It can get an internal linkage", func() { + ts.AddTriple(graph.MakeTriple("i", "can", "win", "")) + query := "(\"i\" (:can $a))" + it := BuildIteratorTreeForQuery(ts, query) + So(it.Type(), ShouldEqual, "and") + out, ok := it.Next() + So(ok, ShouldBeTrue) + So(out, ShouldEqual, ts.GetIdFor("i")) + }) + + }) +} + +func TestTreeConstraintParse(t *testing.T) { + ts := memstore.NewMemTripleStore() + ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) + ts.AddTriple(graph.MakeTriple("food", "is", "good", "")) + query := "(\"i\"\n" + + "(:like\n" + + "($a (:is :good))))" + it := BuildIteratorTreeForQuery(ts, query) + if it.Type() != "and" { + t.Error("Odd iterator tree. Got: %s", it.DebugString(0)) + } + out, ok := it.Next() + if !ok { + t.Error("Got no results") + } + if out != ts.GetIdFor("i") { + t.Errorf("Got %d, expected %d", out, ts.GetIdFor("i")) + } +} + +func TestTreeConstraintTagParse(t *testing.T) { + ts := memstore.NewMemTripleStore() + ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) + ts.AddTriple(graph.MakeTriple("food", "is", "good", "")) + query := "(\"i\"\n" + + "(:like\n" + + "($a (:is :good))))" + it := BuildIteratorTreeForQuery(ts, query) + _, ok := it.Next() + if !ok { + t.Error("Got no results") + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + if ts.GetNameFor(tags["$a"]) != "food" { + t.Errorf("Got %s, expected food", ts.GetNameFor(tags["$a"])) + } + +} + +func TestMultipleConstraintParse(t *testing.T) { + ts := memstore.NewMemTripleStore() + ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) + ts.AddTriple(graph.MakeTriple("i", "like", "beer", "")) + ts.AddTriple(graph.MakeTriple("you", "like", "beer", "")) + query := "($a \n" + + "(:like :beer)\n" + + "(:like \"food\"))" + it := BuildIteratorTreeForQuery(ts, query) + if it.Type() != "and" { + t.Error("Odd iterator tree. Got: %s", it.DebugString(0)) + } + out, ok := it.Next() + if !ok { + t.Error("Got no results") + } + if out != ts.GetIdFor("i") { + t.Errorf("Got %d, expected %d", out, ts.GetIdFor("i")) + } + _, ok = it.Next() + if ok { + t.Error("Too many results") + } +} diff --git a/graph/sexp/sexp-session.go b/graph/sexp/sexp-session.go new file mode 100644 index 0000000..7065e02 --- /dev/null +++ b/graph/sexp/sexp-session.go @@ -0,0 +1,121 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sexp + +// Defines a running session of the sexp query language. + +import ( + "errors" + "fmt" + "sort" + + "github.com/google/cayley/graph" +) + +type SexpSession struct { + ts graph.TripleStore + debug bool +} + +func NewSexpSession(inputTripleStore graph.TripleStore) *SexpSession { + var s SexpSession + s.ts = inputTripleStore + return &s +} + +func (s *SexpSession) ToggleDebug() { + s.debug = !s.debug +} + +func (s *SexpSession) InputParses(input string) (graph.ParseResult, error) { + var parenDepth int + for i, x := range input { + if x == '(' { + parenDepth++ + } + if x == ')' { + parenDepth-- + if parenDepth < 0 { + min := 0 + if (i - 10) > min { + min = i - 10 + } + return graph.ParseFail, errors.New(fmt.Sprintf("Too many close parens at char %d: %s", i, input[min:i])) + } + } + } + if parenDepth > 0 { + return graph.ParseMore, nil + } + if len(ParseString(input)) > 0 { + return graph.Parsed, nil + } + return graph.ParseFail, errors.New("Invalid Syntax") +} + +func (s *SexpSession) ExecInput(input string, out chan interface{}, limit int) { + it := BuildIteratorTreeForQuery(s.ts, input) + newIt, changed := it.Optimize() + if changed { + it = newIt + } + + if s.debug { + fmt.Println(it.DebugString(0)) + } + nResults := 0 + for { + _, ok := it.Next() + if !ok { + break + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + out <- &tags + nResults++ + if nResults > limit && limit != -1 { + break + } + for it.NextResult() == true { + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + out <- &tags + nResults++ + if nResults > limit && limit != -1 { + break + } + } + } + close(out) +} + +func (s *SexpSession) ToText(result interface{}) string { + out := fmt.Sprintln("****") + tags := result.(*map[string]graph.TSVal) + tagKeys := make([]string, len(*tags)) + i := 0 + for k, _ := range *tags { + tagKeys[i] = k + i++ + } + sort.Strings(tagKeys) + for _, k := range tagKeys { + if k == "$_" { + continue + } + out += fmt.Sprintf("%s : %s\n", k, s.ts.GetNameFor((*tags)[k])) + } + return out +} diff --git a/graph/triple.go b/graph/triple.go new file mode 100644 index 0000000..5abdb59 --- /dev/null +++ b/graph/triple.go @@ -0,0 +1,109 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines the struct which makes the TripleStore possible -- the triple. +// +// At its heart, it consists of three fields -- Subject, Predicate, and Object. +// Three IDs that relate to each other. That's all there is to it. The triples +// are the links in the graph, and the existence of node IDs is defined by the +// fact that some triple in the graph mentions them. +// +// This means that a complete representation of the graph is equivalent to a +// list of triples. The rest is just indexing for speed. +// +// Adding fields to the triple is not to be taken lightly. You'll see I mention +// provenance, but don't as yet use it in any backing store. In general, there +// can be features that can be turned on or off for any store, but I haven't +// decided how to allow/disallow them yet. Another such example would be to add +// a forward and reverse index field -- forward being "order the list of +// objects pointed at by this subject with this predicate" such as first and +// second children, top billing, what have you. +// +// There will never be that much in this file except for the definition, but +// the consequences are not to be taken lightly. But do suggest cool features! + +import ( + "fmt" + "reflect" +) + +// Our triple struct, used throughout. +type Triple struct { + Sub string `json:"subject"` + Pred string `json:"predicate"` + Obj string `json:"object"` + Provenance string `json:"provenance,omitempty"` +} + +func NewTriple() *Triple { + return &Triple{} +} + +func MakeTriple(sub string, pred string, obj string, provenance string) *Triple { + return &Triple{sub, pred, obj, provenance} +} + +// List of the valid directions of a triple. +// TODO(barakmich): Replace all instances of "dir string" in the codebase +// with an enum of valid directions, to make this less stringly typed. +var TripleDirections = [4]string{"s", "p", "o", "c"} + +// Per-field accessor for triples +func (t *Triple) Get(dir string) string { + if dir == "s" { + return t.Sub + } else if dir == "p" { + return t.Pred + } else if dir == "prov" || dir == "c" { + return t.Provenance + } else if dir == "o" { + return t.Obj + } else { + panic(fmt.Sprintf("No Such Triple Direction, %s", dir)) + } +} + +func (t *Triple) Equals(other *Triple) bool { + return reflect.DeepEqual(t, other) +} + +// Pretty-prints a triple. +func (t *Triple) ToString() string { + return fmt.Sprintf("%s -- %s -> %s\n", t.Sub, t.Pred, t.Obj) +} + +func (t *Triple) IsValid() bool { + if t.Sub == "" { + return false + } + if t.Pred == "" { + return false + } + if t.Obj == "" { + return false + } + return true +} + +// Prints a triple in N-Triple format. +func (t *Triple) ToNTriple() string { + if t.Provenance == "" { + //TODO(barakmich): Proper escaping. + return fmt.Sprintf("%s %s %s .", t.Sub, t.Pred, t.Obj) + } else { + return fmt.Sprintf("%s %s %s %s .", t.Sub, t.Pred, t.Obj, t.Provenance) + } +} diff --git a/graph/triplestore.go b/graph/triplestore.go new file mode 100644 index 0000000..1e6d9ce --- /dev/null +++ b/graph/triplestore.go @@ -0,0 +1,119 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// Defines the TripleStore interface. Every backing store must implement at +// least this interface. +// +// Most of these are pretty straightforward. As long as we can surface this +// interface, the rest of the stack will "just work" and we can connect to any +// triple backing store we prefer. + +import ( + "github.com/barakmich/glog" +) + +// Defines an opaque "triple store value" type. However the backend wishes to +// implement it, a TSVal is merely a token to a triple or a node that the backing +// store itself understands, and the base iterators pass around. +// +// For example, in a very traditional, graphd-style graph, these are int64s +// (guids of the primitives). In a very direct sort of graph, these could be +// pointers to structs, or merely triples, or whatever works best for the +// backing store. +type TSVal interface{} + +type TripleStore interface { + // Add a triple to the store. + AddTriple(*Triple) + + // Add a set of triples to the store, atomically if possible. + AddTripleSet([]*Triple) + + // Removes a triple matching the given one from the database, + // if it exists. Does nothing otherwise. + RemoveTriple(*Triple) + + // Given an opaque token, returns the triple for that token from the store. + GetTriple(TSVal) *Triple + + // Given a direction and a token, creates an iterator of links which have + // that node token in that directional field. + GetTripleIterator(string, TSVal) Iterator + + // Returns an iterator enumerating all nodes in the graph. + GetNodesAllIterator() Iterator + + // Returns an iterator enumerating all links in the graph. + GetTriplesAllIterator() Iterator + + // Given a node ID, return the opaque token used by the TripleStore + // to represent that id. + GetIdFor(string) TSVal + + // Given an opaque token, return the node that it represents. + GetNameFor(TSVal) string + + // Returns the number of triples currently stored. + Size() int64 + + // Creates a Fixed iterator which can compare TSVals + MakeFixed() *FixedIterator + + // Optimize an iterator in the context of the triple store. + // Suppose we have a better index for the passed tree; this + // gives the TripleStore the oppotunity to replace it + // with a more efficient iterator. + OptimizeIterator(it Iterator) (Iterator, bool) + + // Close the triple store and clean up. (Flush to disk, cleanly + // sever connections, etc) + Close() + + // Convienence function for speed. Given a triple token and a direction + // return the node token for that direction. Sometimes, a TripleStore + // can do this without going all the way to the backing store, and + // gives the TripleStore the opportunity to make this optimization. + // + // Iterators will call this. At worst, a valid implementation is + // self.GetIdFor(self.GetTriple(triple_id).Get(dir)) + GetTripleDirection(triple_id TSVal, dir string) TSVal +} + +type OptionsDict map[string]interface{} + +func (d OptionsDict) GetIntKey(key string) (int, bool) { + if val, ok := d[key]; ok { + switch vv := val.(type) { + case float64: + return int(vv), true + default: + glog.Fatalln("Invalid", key, "parameter type from config.") + } + } + return 0, false +} + +func (d OptionsDict) GetStringKey(key string) (string, bool) { + if val, ok := d[key]; ok { + switch vv := val.(type) { + case string: + return vv, true + default: + glog.Fatalln("Invalid", key, "parameter type from config.") + } + } + return "", false +} diff --git a/graph/value-comparison-iterator.go b/graph/value-comparison-iterator.go new file mode 100644 index 0000000..ecfe86e --- /dev/null +++ b/graph/value-comparison-iterator.go @@ -0,0 +1,193 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +// "Value Comparison" is a unary operator -- a filter across the values in the +// relevant subiterator. +// +// This is hugely useful for things like provenance, but value ranges in general +// come up from time to time. At *worst* we're as big as our underlying iterator. +// At best, we're the null iterator. +// +// This is ripe for backend-side optimization. If you can run a value iterator, +// from a sorted set -- some sort of value index, then go for it. +// +// In MQL terms, this is the [{"age>=": 21}] concept. + +import ( + "fmt" + "log" + "strconv" + "strings" +) + +type ComparisonOperator int + +const ( + kCompareLT ComparisonOperator = iota + kCompareLTE + kCompareGT + kCompareGTE + // Why no Equals? Because that's usually an AndIterator. +) + +type ValueComparisonIterator struct { + BaseIterator + subIt Iterator + op ComparisonOperator + comparisonValue interface{} + ts TripleStore +} + +func NewValueComparisonIterator( + subIt Iterator, + operator ComparisonOperator, + value interface{}, + ts TripleStore) *ValueComparisonIterator { + + var vc ValueComparisonIterator + BaseIteratorInit(&vc.BaseIterator) + vc.subIt = subIt + vc.op = operator + vc.comparisonValue = value + vc.ts = ts + return &vc +} + +// Here's the non-boilerplate part of the ValueComparison iterator. Given a value +// and our operator, determine whether or not we meet the requirement. +func (vc *ValueComparisonIterator) doComparison(val TSVal) bool { + //TODO(barakmich): Implement string comparison. + nodeStr := vc.ts.GetNameFor(val) + switch cVal := vc.comparisonValue.(type) { + case int: + cInt := int64(cVal) + intVal, err := strconv.ParseInt(nodeStr, 10, 64) + if err != nil { + return false + } + return RunIntOp(intVal, vc.op, cInt) + case int64: + intVal, err := strconv.ParseInt(nodeStr, 10, 64) + if err != nil { + return false + } + return RunIntOp(intVal, vc.op, cVal) + default: + return true + } +} + +func (vc *ValueComparisonIterator) Close() { + vc.subIt.Close() +} + +func RunIntOp(a int64, op ComparisonOperator, b int64) bool { + switch op { + case kCompareLT: + return a < b + case kCompareLTE: + return a <= b + case kCompareGT: + return a > b + case kCompareGTE: + return a >= b + default: + log.Fatal("Unknown operator type") + return false + } +} + +func (vc *ValueComparisonIterator) Reset() { + vc.subIt.Reset() +} + +func (vc *ValueComparisonIterator) Clone() Iterator { + out := NewValueComparisonIterator(vc.subIt.Clone(), vc.op, vc.comparisonValue, vc.ts) + out.CopyTagsFrom(vc) + return out +} + +func (vc *ValueComparisonIterator) Next() (TSVal, bool) { + var val TSVal + var ok bool + for { + val, ok = vc.subIt.Next() + if !ok { + return nil, false + } + if vc.doComparison(val) { + break + } + } + vc.Last = val + return val, ok +} + +func (vc *ValueComparisonIterator) NextResult() bool { + for { + hasNext := vc.subIt.NextResult() + if !hasNext { + return false + } + if vc.doComparison(vc.subIt.LastResult()) { + return true + } + } + vc.Last = vc.subIt.LastResult() + return true +} + +func (vc *ValueComparisonIterator) Check(val TSVal) bool { + if !vc.doComparison(val) { + return false + } + return vc.subIt.Check(val) +} + +// If we failed the check, then the subiterator should not contribute to the result +// set. Otherwise, go ahead and tag it. +func (vc *ValueComparisonIterator) TagResults(out *map[string]TSVal) { + vc.BaseIterator.TagResults(out) + vc.subIt.TagResults(out) +} + +// Registers the value-comparison iterator. +func (vc *ValueComparisonIterator) Type() string { return "value-comparison" } + +// Prints the value-comparison and its subiterator. +func (vc *ValueComparisonIterator) DebugString(indent int) string { + return fmt.Sprintf("%s(%s\n%s)", + strings.Repeat(" ", indent), + vc.Type(), vc.subIt.DebugString(indent+4)) +} + +// There's nothing to optimize, locally, for a value-comparison iterator. +// Replace the underlying iterator if need be. +// potentially replace it. +func (vc *ValueComparisonIterator) Optimize() (Iterator, bool) { + newSub, changed := vc.subIt.Optimize() + if changed { + vc.subIt.Close() + vc.subIt = newSub + } + return vc, false +} + +// We're only as expensive as our subiterator. +// Again, optimized value comparison iterators should do better. +func (vc *ValueComparisonIterator) GetStats() *IteratorStats { + return vc.subIt.GetStats() +} diff --git a/graph/value-comparison-iterator_test.go b/graph/value-comparison-iterator_test.go new file mode 100644 index 0000000..23c795d --- /dev/null +++ b/graph/value-comparison-iterator_test.go @@ -0,0 +1,126 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "testing" +) + +func SetupMockTripleStore(nameMap map[string]int) *TestTripleStore { + ts := new(TestTripleStore) + for k, v := range nameMap { + ts.On("GetIdFor", k).Return(v) + ts.On("GetNameFor", v).Return(k) + } + return ts +} + +func SimpleValueTripleStore() *TestTripleStore { + ts := SetupMockTripleStore(map[string]int{ + "0": 0, + "1": 1, + "2": 2, + "3": 3, + "4": 4, + "5": 5, + }) + return ts +} + +func BuildFixedIterator() *FixedIterator { + fixed := newFixedIterator() + fixed.AddValue(0) + fixed.AddValue(1) + fixed.AddValue(2) + fixed.AddValue(3) + fixed.AddValue(4) + return fixed +} + +func checkIteratorContains(ts TripleStore, it Iterator, expected []string, t *testing.T) { + var actual []string + actual = nil + for { + val, ok := it.Next() + if !ok { + break + } + actual = append(actual, ts.GetNameFor(val)) + } + actualSet := actual[:] + for _, a := range expected { + found := false + for j, b := range actualSet { + if a == b { + actualSet = append(actualSet[:j], actualSet[j+1:]...) + found = true + break + } + } + if !found { + t.Error("Couldn't find", a, "in actual output.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) + return + } + } + if len(actualSet) != 0 { + t.Error("Actual output has more than expected.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) + } +} + +func TestWorkingIntValueComparison(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewValueComparisonIterator(fixed, kCompareLT, int64(3), ts) + checkIteratorContains(ts, vc, []string{"0", "1", "2"}, t) +} + +func TestFailingIntValueComparison(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewValueComparisonIterator(fixed, kCompareLT, int64(0), ts) + checkIteratorContains(ts, vc, []string{}, t) +} + +func TestWorkingGT(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewValueComparisonIterator(fixed, kCompareGT, int64(2), ts) + checkIteratorContains(ts, vc, []string{"3", "4"}, t) +} + +func TestWorkingGTE(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewValueComparisonIterator(fixed, kCompareGTE, int64(2), ts) + checkIteratorContains(ts, vc, []string{"2", "3", "4"}, t) +} + +func TestVCICheck(t *testing.T) { + ts := SimpleValueTripleStore() + fixed := BuildFixedIterator() + vc := NewValueComparisonIterator(fixed, kCompareGTE, int64(2), ts) + if vc.Check(1) { + t.Error("1 is less than 2, should be GTE") + } + if !vc.Check(2) { + t.Error("2 is GTE 2") + } + if !vc.Check(3) { + t.Error("3 is GTE 2") + } + if vc.Check(5) { + t.Error("5 is not in the underlying iterator") + } +} diff --git a/gremlin/gremlin-build-iterator.go b/gremlin/gremlin-build-iterator.go new file mode 100644 index 0000000..b6e9a45 --- /dev/null +++ b/gremlin/gremlin-build-iterator.go @@ -0,0 +1,315 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gremlin + +import ( + "strconv" + + "github.com/barakmich/glog" + "github.com/robertkrimen/otto" + + "github.com/google/cayley/graph" +) + +func getStrings(obj *otto.Object, field string) []string { + strings := make([]string, 0) + val, _ := obj.Get(field) + if !val.IsUndefined() { + export, _ := val.Export() + array := export.([]interface{}) + for _, arg := range array { + strings = append(strings, arg.(string)) + } + } + return strings +} + +func getStringArgs(obj *otto.Object) []string { return getStrings(obj, "string_args") } + +func buildIteratorTree(obj *otto.Object, ts graph.TripleStore) graph.Iterator { + if !isVertexChain(obj) { + return graph.NewNullIterator() + } + return buildIteratorTreeHelper(obj, ts, graph.NewNullIterator()) +} + +func makeListOfStringsFromArrayValue(obj *otto.Object) []string { + var output []string + lengthValue, _ := obj.Get("length") + length, _ := lengthValue.ToInteger() + ulength := uint32(length) + for index := uint32(0); index < ulength; index += 1 { + name := strconv.FormatInt(int64(index), 10) + value, err := obj.Get(name) + if err != nil { + continue + } + if !value.IsString() { + continue + } + s, _ := value.ToString() + output = append(output, s) + } + return output +} + +func buildIteratorFromValue(val otto.Value, ts graph.TripleStore) graph.Iterator { + if val.IsNull() || val.IsUndefined() { + return ts.GetNodesAllIterator() + } + if val.IsPrimitive() { + thing, _ := val.Export() + switch v := thing.(type) { + case string: + it := ts.MakeFixed() + it.AddValue(ts.GetIdFor(v)) + return it + default: + glog.Errorln("Trying to build unknown primitive value.") + } + } + switch val.Class() { + case "Object": + return buildIteratorTree(val.Object(), ts) + case "Array": + // Had better be an array of strings + strings := makeListOfStringsFromArrayValue(val.Object()) + it := ts.MakeFixed() + for _, x := range strings { + it.AddValue(ts.GetIdFor(x)) + } + return it + case "Number": + fallthrough + case "Boolean": + fallthrough + case "Date": + fallthrough + case "String": + it := ts.MakeFixed() + str, _ := val.ToString() + it.AddValue(ts.GetIdFor(str)) + return it + default: + glog.Errorln("Trying to handle unsupported Javascript value.") + return graph.NewNullIterator() + } +} + +func buildInOutIterator(obj *otto.Object, ts graph.TripleStore, base graph.Iterator, isReverse bool) graph.Iterator { + argList, _ := obj.Get("_gremlin_values") + if argList.Class() != "GoArray" { + glog.Errorln("How is arglist not an array? Return nothing.", argList.Class()) + return graph.NewNullIterator() + } + argArray := argList.Object() + lengthVal, _ := argArray.Get("length") + length, _ := lengthVal.ToInteger() + var predicateNodeIterator graph.Iterator + if length == 0 { + predicateNodeIterator = ts.GetNodesAllIterator() + } else { + zero, _ := argArray.Get("0") + predicateNodeIterator = buildIteratorFromValue(zero, ts) + } + if length >= 2 { + var tags []string + one, _ := argArray.Get("1") + if one.IsString() { + s, _ := one.ToString() + tags = append(tags, s) + } else if one.Class() == "Array" { + tags = makeListOfStringsFromArrayValue(one.Object()) + } + for _, tag := range tags { + predicateNodeIterator.AddTag(tag) + } + } + + in, out := "s", "o" + if isReverse { + in, out = out, in + } + lto := graph.NewLinksToIterator(ts, base, in) + and := graph.NewAndIterator() + and.AddSubIterator(graph.NewLinksToIterator(ts, predicateNodeIterator, "p")) + and.AddSubIterator(lto) + return graph.NewHasaIterator(ts, and, out) +} + +func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph.Iterator) graph.Iterator { + var it graph.Iterator + it = base + // TODO: Better error handling + kindVal, _ := obj.Get("_gremlin_type") + stringArgs := getStringArgs(obj) + var subIt graph.Iterator + prevVal, _ := obj.Get("_gremlin_prev") + if !prevVal.IsObject() { + subIt = base + } else { + subIt = buildIteratorTreeHelper(prevVal.Object(), ts, base) + } + + kind, _ := kindVal.ToString() + switch kind { + case "vertex": + if len(stringArgs) == 0 { + it = ts.GetNodesAllIterator() + } else { + fixed := ts.MakeFixed() + for _, name := range stringArgs { + fixed.AddValue(ts.GetIdFor(name)) + } + it = fixed + } + case "tag": + it = subIt + for _, tag := range stringArgs { + it.AddTag(tag) + } + case "save": + all := ts.GetNodesAllIterator() + if len(stringArgs) > 2 || len(stringArgs) == 0 { + return graph.NewNullIterator() + } + if len(stringArgs) == 2 { + all.AddTag(stringArgs[1]) + } else { + all.AddTag(stringArgs[0]) + } + predFixed := ts.MakeFixed() + predFixed.AddValue(ts.GetIdFor(stringArgs[0])) + subAnd := graph.NewAndIterator() + subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, "p")) + subAnd.AddSubIterator(graph.NewLinksToIterator(ts, all, "o")) + hasa := graph.NewHasaIterator(ts, subAnd, "s") + and := graph.NewAndIterator() + and.AddSubIterator(hasa) + and.AddSubIterator(subIt) + it = and + case "saver": + all := ts.GetNodesAllIterator() + if len(stringArgs) > 2 || len(stringArgs) == 0 { + return graph.NewNullIterator() + } + if len(stringArgs) == 2 { + all.AddTag(stringArgs[1]) + } else { + all.AddTag(stringArgs[0]) + } + predFixed := ts.MakeFixed() + predFixed.AddValue(ts.GetIdFor(stringArgs[0])) + subAnd := graph.NewAndIterator() + subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, "p")) + subAnd.AddSubIterator(graph.NewLinksToIterator(ts, all, "s")) + hasa := graph.NewHasaIterator(ts, subAnd, "o") + and := graph.NewAndIterator() + and.AddSubIterator(hasa) + and.AddSubIterator(subIt) + it = and + case "has": + fixed := ts.MakeFixed() + if len(stringArgs) < 2 { + return graph.NewNullIterator() + } + for _, name := range stringArgs[1:] { + fixed.AddValue(ts.GetIdFor(name)) + } + predFixed := ts.MakeFixed() + predFixed.AddValue(ts.GetIdFor(stringArgs[0])) + subAnd := graph.NewAndIterator() + subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, "p")) + subAnd.AddSubIterator(graph.NewLinksToIterator(ts, fixed, "o")) + hasa := graph.NewHasaIterator(ts, subAnd, "s") + and := graph.NewAndIterator() + and.AddSubIterator(hasa) + and.AddSubIterator(subIt) + it = and + case "morphism": + it = base + case "and": + arg, _ := obj.Get("_gremlin_values") + firstArg, _ := arg.Object().Get("0") + if !isVertexChain(firstArg.Object()) { + return graph.NewNullIterator() + } + argIt := buildIteratorTree(firstArg.Object(), ts) + + and := graph.NewAndIterator() + and.AddSubIterator(subIt) + and.AddSubIterator(argIt) + it = and + case "back": + arg, _ := obj.Get("_gremlin_back_chain") + argIt := buildIteratorTree(arg.Object(), ts) + and := graph.NewAndIterator() + and.AddSubIterator(subIt) + and.AddSubIterator(argIt) + it = and + case "is": + fixed := ts.MakeFixed() + for _, name := range stringArgs { + fixed.AddValue(ts.GetIdFor(name)) + } + and := graph.NewAndIterator() + and.AddSubIterator(fixed) + and.AddSubIterator(subIt) + it = and + case "or": + arg, _ := obj.Get("_gremlin_values") + firstArg, _ := arg.Object().Get("0") + if !isVertexChain(firstArg.Object()) { + return graph.NewNullIterator() + } + argIt := buildIteratorTree(firstArg.Object(), ts) + + or := graph.NewOrIterator() + or.AddSubIterator(subIt) + or.AddSubIterator(argIt) + it = or + case "both": + // Hardly the most efficient pattern, but the most general. + // Worth looking into an Optimize() optimization here. + clone := subIt.Clone() + it1 := buildInOutIterator(obj, ts, subIt, false) + it2 := buildInOutIterator(obj, ts, clone, true) + + or := graph.NewOrIterator() + or.AddSubIterator(it1) + or.AddSubIterator(it2) + it = or + case "out": + it = buildInOutIterator(obj, ts, subIt, false) + case "follow": + // Follow a morphism + arg, _ := obj.Get("_gremlin_values") + firstArg, _ := arg.Object().Get("0") + if isVertexChain(firstArg.Object()) { + return graph.NewNullIterator() + } + it = buildIteratorTreeHelper(firstArg.Object(), ts, subIt) + case "followr": + // Follow a morphism + arg, _ := obj.Get("_gremlin_followr") + if isVertexChain(arg.Object()) { + return graph.NewNullIterator() + } + it = buildIteratorTreeHelper(arg.Object(), ts, subIt) + case "in": + it = buildInOutIterator(obj, ts, subIt, true) + } + return it +} diff --git a/gremlin/gremlin-env.go b/gremlin/gremlin-env.go new file mode 100644 index 0000000..4e7f332 --- /dev/null +++ b/gremlin/gremlin-env.go @@ -0,0 +1,95 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gremlin + +// Builds a new Gremlin environment pointing at a session. + +import ( + "github.com/barakmich/glog" + "github.com/robertkrimen/otto" +) + +func BuildGremlinEnv(ses *GremlinSession) *otto.Otto { + env := otto.New() + setupGremlin(env, ses) + return env +} + +func concatStringArgs(call otto.FunctionCall) *[]interface{} { + outStrings := make([]interface{}, 0) + for _, arg := range call.ArgumentList { + if arg.IsString() { + outStrings = append(outStrings, arg.String()) + } + if arg.IsObject() && arg.Class() == "Array" { + obj, _ := arg.Export() + for _, x := range obj.([]interface{}) { + outStrings = append(outStrings, x.(string)) + } + } + } + return &outStrings +} + +func isVertexChain(obj *otto.Object) bool { + val, _ := obj.Get("_gremlin_type") + if x, _ := val.ToString(); x == "vertex" { + return true + } + val, _ = obj.Get("_gremlin_prev") + if val.IsObject() { + return isVertexChain(val.Object()) + } + return false +} + +func setupGremlin(env *otto.Otto, ses *GremlinSession) { + graph, _ := env.Object("graph = {}") + graph.Set("Vertex", func(call otto.FunctionCall) otto.Value { + call.Otto.Run("var out = {}") + out, err := call.Otto.Object("out") + if err != nil { + glog.Error(err.Error()) + return otto.TrueValue() + } + out.Set("_gremlin_type", "vertex") + outStrings := concatStringArgs(call) + if len(*outStrings) > 0 { + out.Set("string_args", *outStrings) + } + embedTraversals(env, ses, out) + embedFinals(env, ses, out) + return out.Value() + }) + + graph.Set("Morphism", func(call otto.FunctionCall) otto.Value { + call.Otto.Run("var out = {}") + out, _ := call.Otto.Object("out") + out.Set("_gremlin_type", "morphism") + embedTraversals(env, ses, out) + return out.Value() + }) + graph.Set("Emit", func(call otto.FunctionCall) otto.Value { + value := call.Argument(0) + if value.IsDefined() { + ses.SendResult(&GremlinResult{metaresult: false, err: "", val: &value, actualResults: nil}) + } + return otto.NullValue() + }) + env.Run("graph.V = graph.Vertex") + env.Run("graph.M = graph.Morphism") + env.Run("g = graph") + +} diff --git a/gremlin/gremlin-finals.go b/gremlin/gremlin-finals.go new file mode 100644 index 0000000..f8c978d --- /dev/null +++ b/gremlin/gremlin-finals.go @@ -0,0 +1,274 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gremlin + +import ( + "github.com/barakmich/glog" + "github.com/robertkrimen/otto" + + "github.com/google/cayley/graph" +) + +const GremlinTopResultTag = "id" + +func embedFinals(env *otto.Otto, ses *GremlinSession, obj *otto.Object) { + obj.Set("All", allFunc(env, ses, obj)) + obj.Set("GetLimit", limitFunc(env, ses, obj)) + obj.Set("ToArray", toArrayFunc(env, ses, obj, false)) + obj.Set("ToValue", toValueFunc(env, ses, obj, false)) + obj.Set("TagArray", toArrayFunc(env, ses, obj, true)) + obj.Set("TagValue", toValueFunc(env, ses, obj, true)) + obj.Set("Map", mapFunc(env, ses, obj)) + obj.Set("ForEach", mapFunc(env, ses, obj)) +} + +func allFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + it := buildIteratorTree(obj, ses.ts) + it.AddTag(GremlinTopResultTag) + ses.limit = -1 + ses.count = 0 + runIteratorOnSession(it, ses) + return otto.NullValue() + } +} + +func limitFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + if len(call.ArgumentList) > 0 { + limitVal, _ := call.Argument(0).ToInteger() + it := buildIteratorTree(obj, ses.ts) + it.AddTag(GremlinTopResultTag) + ses.limit = int(limitVal) + ses.count = 0 + runIteratorOnSession(it, ses) + } + return otto.NullValue() + } +} + +func toArrayFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object, withTags bool) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + it := buildIteratorTree(obj, ses.ts) + it.AddTag(GremlinTopResultTag) + limit := -1 + if len(call.ArgumentList) > 0 { + limitParsed, _ := call.Argument(0).ToInteger() + limit = int(limitParsed) + } + var val otto.Value + var err error + if !withTags { + array := runIteratorToArrayNoTags(it, ses, limit) + val, err = call.Otto.ToValue(array) + } else { + array := runIteratorToArray(it, ses, limit) + val, err = call.Otto.ToValue(array) + } + + if err != nil { + glog.Error(err) + return otto.NullValue() + } + return val + } +} + +func toValueFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object, withTags bool) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + it := buildIteratorTree(obj, ses.ts) + it.AddTag(GremlinTopResultTag) + limit := 1 + var val otto.Value + var err error + if !withTags { + array := runIteratorToArrayNoTags(it, ses, limit) + if len(array) < 1 { + return otto.NullValue() + } + val, err = call.Otto.ToValue(array[0]) + } else { + array := runIteratorToArray(it, ses, limit) + if len(array) < 1 { + return otto.NullValue() + } + val, err = call.Otto.ToValue(array[0]) + } + if err != nil { + glog.Error(err) + return otto.NullValue() + } else { + return val + } + + } +} + +func mapFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + it := buildIteratorTree(obj, ses.ts) + it.AddTag(GremlinTopResultTag) + limit := -1 + if len(call.ArgumentList) == 0 { + return otto.NullValue() + } + callback := call.Argument(len(call.ArgumentList) - 1) + if len(call.ArgumentList) > 1 { + limitParsed, _ := call.Argument(0).ToInteger() + limit = int(limitParsed) + } + runIteratorWithCallback(it, ses, callback, call, limit) + return otto.NullValue() + } +} + +func tagsToValueMap(m map[string]graph.TSVal, ses *GremlinSession) map[string]string { + outputMap := make(map[string]string) + for k, v := range m { + outputMap[k] = ses.ts.GetNameFor(v) + } + return outputMap +} + +func runIteratorToArray(it graph.Iterator, ses *GremlinSession, limit int) []map[string]string { + output := make([]map[string]string, 0) + count := 0 + it, _ = it.Optimize() + for { + if ses.doHalt { + return nil + } + _, ok := it.Next() + if !ok { + break + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + output = append(output, tagsToValueMap(tags, ses)) + count++ + if limit >= 0 && count >= limit { + break + } + for it.NextResult() == true { + if ses.doHalt { + return nil + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + output = append(output, tagsToValueMap(tags, ses)) + count++ + if limit >= 0 && count >= limit { + break + } + } + } + it.Close() + return output +} + +func runIteratorToArrayNoTags(it graph.Iterator, ses *GremlinSession, limit int) []string { + output := make([]string, 0) + count := 0 + it, _ = it.Optimize() + for { + if ses.doHalt { + return nil + } + val, ok := it.Next() + if !ok { + break + } + output = append(output, ses.ts.GetNameFor(val)) + count++ + if limit >= 0 && count >= limit { + break + } + } + it.Close() + return output +} + +func runIteratorWithCallback(it graph.Iterator, ses *GremlinSession, callback otto.Value, this otto.FunctionCall, limit int) { + count := 0 + it, _ = it.Optimize() + for { + if ses.doHalt { + return + } + _, ok := it.Next() + if !ok { + break + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + val, _ := this.Otto.ToValue(tagsToValueMap(tags, ses)) + val, _ = callback.Call(this.This, val) + count++ + if limit >= 0 && count >= limit { + break + } + for it.NextResult() == true { + if ses.doHalt { + return + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + val, _ := this.Otto.ToValue(tagsToValueMap(tags, ses)) + val, _ = callback.Call(this.This, val) + count++ + if limit >= 0 && count >= limit { + break + } + } + } + it.Close() +} + +func runIteratorOnSession(it graph.Iterator, ses *GremlinSession) { + if ses.lookingForQueryShape { + graph.OutputQueryShapeForIterator(it, ses.ts, &(ses.queryShape)) + return + } + it, _ = it.Optimize() + glog.V(2).Infoln(it.DebugString(0)) + for { + // TODO(barakmich): Better halting. + if ses.doHalt { + return + } + _, ok := it.Next() + if !ok { + break + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + cont := ses.SendResult(&GremlinResult{metaresult: false, err: "", val: nil, actualResults: &tags}) + if !cont { + break + } + for it.NextResult() == true { + if ses.doHalt { + return + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + cont := ses.SendResult(&GremlinResult{metaresult: false, err: "", val: nil, actualResults: &tags}) + if !cont { + break + } + } + } + it.Close() +} diff --git a/gremlin/gremlin-functional_test.go b/gremlin/gremlin-functional_test.go new file mode 100644 index 0000000..f6c65fb --- /dev/null +++ b/gremlin/gremlin-functional_test.go @@ -0,0 +1,230 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gremlin + +import ( + "sort" + "testing" + + . "github.com/smartystreets/goconvey/convey" + + "github.com/google/cayley/graph/memstore" +) + +// +---+ +---+ +// | A |------- ->| F |<-- +// +---+ \------>+---+-/ +---+ \--+---+ +// ------>|#B#| | | E | +// +---+-------/ >+---+ | +---+ +// | C | / v +// +---+ -/ +---+ +// ---- +---+/ |#G#| +// \-->|#D#|------------->+---+ +// +---+ +// + +func buildTripleStore() *GremlinSession { + ts := memstore.MakeTestingMemstore() + return NewGremlinSession(ts, -1, false) +} + +func shouldBeUnordered(actual interface{}, expected ...interface{}) string { + if len(expected) != 1 { + return "Only one list supported" + } + actualStr := actual.([]string) + expectedStr := expected[0].([]string) + sort.Strings(actualStr) + sort.Strings(expectedStr) + return ShouldResemble(actualStr, expectedStr) +} + +func runQueryGetTag(query string, tag string) ([]string, int) { + js := buildTripleStore() + output := make([]string, 0) + c := make(chan interface{}, 5) + js.ExecInput(query, c, -1) + count := 0 + for result := range c { + count++ + data := result.(*GremlinResult) + if data.val == nil { + val := (*data.actualResults)[tag] + if val != nil { + output = append(output, js.ts.GetNameFor(val)) + } + } + } + return output, count +} + +func ConveyQuery(doc string, query string, expected []string) { + ConveyQueryTag(doc, query, GremlinTopResultTag, expected) +} + +func ConveyQueryTag(doc string, query string, tag string, expected []string) { + Convey(doc, func() { + actual, _ := runQueryGetTag(query, tag) + So(actual, shouldBeUnordered, expected) + }) +} + +func TestGremlin(t *testing.T) { + Convey("With a default memtriplestore", t, func() { + + ConveyQuery("Can get a single vertex", + `g.V("A").All()`, + []string{"A"}) + + ConveyQuery("Can use .Out()", + `g.V("A").Out("follows").All()`, + []string{"B"}) + + ConveyQuery("Can use .In()", + `g.V("B").In("follows").All()`, + []string{"A", "C", "D"}) + + ConveyQuery("Can use .Both()", + `g.V("F").Both("follows").All()`, + []string{"B", "G", "E"}) + + ConveyQuery("Can use .Tag()-.Is()-.Back()", + `g.V("B").In("follows").Tag("foo").Out("status").Is("cool").Back("foo").All()`, + []string{"D"}) + + ConveyQuery("Can separate .Tag()-.Is()-.Back()", + ` + x = g.V("C").Out("follows").Tag("foo").Out("status").Is("cool").Back("foo") + x.In("follows").Is("D").Back("foo").All() + `, + []string{"B"}) + + Convey("Can do multiple .Back()s", func() { + query := ` + g.V("E").Out("follows").As("f").Out("follows").Out("status").Is("cool").Back("f").In("follows").In("follows").As("acd").Out("status").Is("cool").Back("f").All() + ` + expected := []string{"D"} + actual, _ := runQueryGetTag(query, "acd") + So(actual, shouldBeUnordered, expected) + }) + + }) +} + +func TestGremlinMorphism(t *testing.T) { + Convey("With a default memtriplestore", t, func() { + + ConveyQuery("Simple morphism works", + ` + grandfollows = g.M().Out("follows").Out("follows") + g.V("C").Follow(grandfollows).All() + `, + []string{"G", "F", "B"}) + + ConveyQuery("Reverse morphism works", + ` + grandfollows = g.M().Out("follows").Out("follows") + g.V("F").FollowR(grandfollows).All() + `, []string{"A", "C", "D"}) + + }) +} + +func TestGremlinIntersection(t *testing.T) { + Convey("With a default memtriplestore", t, func() { + ConveyQuery("Simple intersection", + ` + function follows(x) { return g.V(x).Out("follows") } + + follows("D").And(follows("C")).All() + `, []string{"B"}) + + ConveyQuery("Simple Morphism Intersection", + ` + grandfollows = g.M().Out("follows").Out("follows") + function gfollows(x) { return g.V(x).Follow(grandfollows) } + + gfollows("A").And(gfollows("C")).All() + `, []string{"F"}) + + ConveyQuery("Double Morphism Intersection", + ` + grandfollows = g.M().Out("follows").Out("follows") + function gfollows(x) { return g.V(x).Follow(grandfollows) } + + gfollows("E").And(gfollows("C")).And(gfollows("B")).All() + `, []string{"G"}) + + ConveyQuery("Reverse Intersection", + ` + grandfollows = g.M().Out("follows").Out("follows") + + g.V("G").FollowR(grandfollows).Intersect(g.V("F").FollowR(grandfollows)).All() + `, []string{"C"}) + + ConveyQuery("Standard sort of morphism intersection, continue follow", + ` + gfollowers = g.M().In("follows").In("follows") + function cool(x) { return g.V(x).As("a").Out("status").Is("cool").Back("a") } + cool("G").Follow(gfollowers).Intersect(cool("B").Follow(gfollowers)).All() + `, []string{"C"}) + + }) +} + +func TestGremlinHas(t *testing.T) { + Convey("With a default memtriplestore", t, func() { + ConveyQuery("Test a simple Has", + `g.V().Has("status", "cool").All()`, + []string{"G", "D", "B"}) + + ConveyQuery("Test a double Has", + `g.V().Has("status", "cool").Has("follows", "F").All()`, + []string{"B"}) + + }) +} + +func TestGremlinTag(t *testing.T) { + Convey("With a default memtriplestore", t, func() { + ConveyQueryTag("Test a simple save", + `g.V().Save("status", "somecool").All()`, + "somecool", + []string{"cool", "cool", "cool"}) + + ConveyQueryTag("Test a simple saveR", + `g.V("cool").SaveR("status", "who").All()`, + "who", + []string{"G", "D", "B"}) + + ConveyQueryTag("Test an out save", + `g.V("D").Out(null, "pred").All()`, + "pred", + []string{"follows", "follows", "status"}) + + ConveyQueryTag("Test a tag list", + `g.V("D").Out(null, ["pred", "foo", "bar"]).All()`, + "foo", + []string{"follows", "follows", "status"}) + + ConveyQuery("Test a pred list", + `g.V("D").Out(["follows", "status"]).All()`, + []string{"B", "G", "cool"}) + + ConveyQuery("Test a predicate path", + `g.V("D").Out(g.V("follows"), "pred").All()`, + []string{"B", "G"}) + }) +} diff --git a/gremlin/gremlin-session.go b/gremlin/gremlin-session.go new file mode 100644 index 0000000..a0b0483 --- /dev/null +++ b/gremlin/gremlin-session.go @@ -0,0 +1,266 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gremlin + +import ( + "errors" + "fmt" + "sort" + "time" + + "github.com/robertkrimen/otto" + + "github.com/google/cayley/graph" +) + +type GremlinSession struct { + ts graph.TripleStore + currentChannel chan interface{} + env *otto.Otto + debug bool + limit int + count int + dataOutput []interface{} + lookingForQueryShape bool + queryShape map[string]interface{} + err error + script *otto.Script + doHalt bool + timeoutSec time.Duration + emptyEnv *otto.Otto +} + +func NewGremlinSession(inputTripleStore graph.TripleStore, timeoutSec int, persist bool) *GremlinSession { + var g GremlinSession + g.ts = inputTripleStore + g.env = BuildGremlinEnv(&g) + g.limit = -1 + g.count = 0 + g.lookingForQueryShape = false + if persist { + g.emptyEnv = g.env + } + if timeoutSec < 0 { + g.timeoutSec = time.Duration(-1) + } else { + g.timeoutSec = time.Duration(timeoutSec) + } + g.ClearJson() + return &g +} + +type GremlinResult struct { + metaresult bool + err string + val *otto.Value + actualResults *map[string]graph.TSVal +} + +func (g *GremlinSession) ToggleDebug() { + g.debug = !g.debug +} + +func (g *GremlinSession) GetQuery(input string, output_struct chan map[string]interface{}) { + defer close(output_struct) + g.queryShape = make(map[string]interface{}) + g.lookingForQueryShape = true + g.env.Run(input) + output_struct <- g.queryShape + g.queryShape = nil +} + +func (g *GremlinSession) InputParses(input string) (graph.ParseResult, error) { + script, err := g.env.Compile("", input) + if err != nil { + return graph.ParseFail, err + } + g.script = script + return graph.Parsed, nil +} + +func (g *GremlinSession) SendResult(result *GremlinResult) bool { + if g.limit >= 0 && g.limit == g.count { + return false + } + if g.doHalt { + return false + } + if g.currentChannel != nil { + g.currentChannel <- result + g.count++ + if g.limit >= 0 && g.limit == g.count { + return false + } else { + return true + } + } + return false +} + +var halt = errors.New("Query Timeout") + +func (g *GremlinSession) runUnsafe(input interface{}) (otto.Value, error) { + g.doHalt = false + defer func() { + if caught := recover(); caught != nil { + if caught == halt { + g.err = halt + return + } + panic(caught) // Something else happened, repanic! + } + }() + + g.env.Interrupt = make(chan func(), 1) // The buffer prevents blocking + + if g.timeoutSec != -1 { + go func() { + time.Sleep(g.timeoutSec * time.Second) // Stop after two seconds + g.doHalt = true + if g.env != nil { + g.env.Interrupt <- func() { + panic(halt) + } + g.env = g.emptyEnv + } + }() + } + + return g.env.Run(input) // Here be dragons (risky code) +} + +func (g *GremlinSession) ExecInput(input string, out chan interface{}, limit int) { + defer close(out) + g.err = nil + g.currentChannel = out + var err error + var value otto.Value + if g.script == nil { + value, err = g.runUnsafe(input) + } else { + value, err = g.runUnsafe(g.script) + } + if err != nil { + out <- &GremlinResult{metaresult: true, + err: err.Error(), + val: &value, + actualResults: nil} + } else { + out <- &GremlinResult{metaresult: true, + err: "", + val: &value, + actualResults: nil} + } + g.currentChannel = nil + g.script = nil + g.env = g.emptyEnv + return +} + +func (s *GremlinSession) ToText(result interface{}) string { + data := result.(*GremlinResult) + if data.metaresult { + if data.err != "" { + return fmt.Sprintln("Error: ", data.err) + } + if data.val != nil { + s, _ := data.val.Export() + if data.val.IsObject() { + typeVal, _ := data.val.Object().Get("_gremlin_type") + if !typeVal.IsUndefined() { + s = "[internal Iterator]" + } + } + return fmt.Sprintln("=>", s) + } + return "" + } + var out string + out = fmt.Sprintln("****") + if data.val == nil { + tags := data.actualResults + tagKeys := make([]string, len(*tags)) + i := 0 + for k, _ := range *tags { + tagKeys[i] = k + i++ + } + sort.Strings(tagKeys) + for _, k := range tagKeys { + if k == "$_" { + continue + } + out += fmt.Sprintf("%s : %s\n", k, s.ts.GetNameFor((*tags)[k])) + } + } else { + if data.val.IsObject() { + export, _ := data.val.Export() + mapExport := export.(map[string]string) + for k, v := range mapExport { + out += fmt.Sprintf("%s : %v\n", k, v) + } + } else { + strVersion, _ := data.val.ToString() + out += fmt.Sprintf("%s\n", strVersion) + } + } + return out +} + +// Web stuff +func (ses *GremlinSession) BuildJson(result interface{}) { + data := result.(*GremlinResult) + if !data.metaresult { + if data.val == nil { + obj := make(map[string]string) + tags := data.actualResults + tagKeys := make([]string, len(*tags)) + i := 0 + for k, _ := range *tags { + tagKeys[i] = k + i++ + } + sort.Strings(tagKeys) + for _, k := range tagKeys { + obj[k] = ses.ts.GetNameFor((*tags)[k]) + } + ses.dataOutput = append(ses.dataOutput, obj) + } else { + if data.val.IsObject() { + export, _ := data.val.Export() + ses.dataOutput = append(ses.dataOutput, export) + } else { + strVersion, _ := data.val.ToString() + ses.dataOutput = append(ses.dataOutput, strVersion) + } + } + } + +} + +func (ses *GremlinSession) GetJson() (interface{}, error) { + defer ses.ClearJson() + if ses.err != nil { + return nil, ses.err + } + if ses.doHalt { + return nil, halt + } + return ses.dataOutput, nil +} + +func (ses *GremlinSession) ClearJson() { + ses.dataOutput = nil +} diff --git a/gremlin/gremlin-traversals.go b/gremlin/gremlin-traversals.go new file mode 100644 index 0000000..c0f4704 --- /dev/null +++ b/gremlin/gremlin-traversals.go @@ -0,0 +1,184 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gremlin + +// Adds special traversal functions to JS Gremlin objects. Most of these just build the chain of objects, and won't often need the session. + +import ( + "github.com/barakmich/glog" + "github.com/robertkrimen/otto" +) + +func embedTraversals(env *otto.Otto, ses *GremlinSession, obj *otto.Object) { + obj.Set("In", gremlinFunc("in", obj, env, ses)) + obj.Set("Out", gremlinFunc("out", obj, env, ses)) + obj.Set("Is", gremlinFunc("is", obj, env, ses)) + obj.Set("Both", gremlinFunc("both", obj, env, ses)) + obj.Set("Follow", gremlinFunc("follow", obj, env, ses)) + obj.Set("FollowR", gremlinFollowR("followr", obj, env, ses)) + obj.Set("And", gremlinFunc("and", obj, env, ses)) + obj.Set("Intersect", gremlinFunc("and", obj, env, ses)) + obj.Set("Union", gremlinFunc("or", obj, env, ses)) + obj.Set("Or", gremlinFunc("or", obj, env, ses)) + obj.Set("Back", gremlinBack("back", obj, env, ses)) + obj.Set("Tag", gremlinFunc("tag", obj, env, ses)) + obj.Set("As", gremlinFunc("tag", obj, env, ses)) + obj.Set("Has", gremlinFunc("has", obj, env, ses)) + obj.Set("Save", gremlinFunc("save", obj, env, ses)) + obj.Set("SaveR", gremlinFunc("saver", obj, env, ses)) +} + +func gremlinFunc(kind string, prevObj *otto.Object, env *otto.Otto, ses *GremlinSession) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + call.Otto.Run("var out = {}") + out, _ := call.Otto.Object("out") + out.Set("_gremlin_type", kind) + out.Set("_gremlin_values", call.ArgumentList) + out.Set("_gremlin_prev", prevObj) + outStrings := concatStringArgs(call) + if len(*outStrings) > 0 { + out.Set("string_args", *outStrings) + } + embedTraversals(env, ses, out) + if isVertexChain(call.This.Object()) { + embedFinals(env, ses, out) + } + return out.Value() + } +} + +func gremlinBack(kind string, prevObj *otto.Object, env *otto.Otto, ses *GremlinSession) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + call.Otto.Run("var out = {}") + out, _ := call.Otto.Object("out") + out.Set("_gremlin_type", kind) + out.Set("_gremlin_values", call.ArgumentList) + outStrings := concatStringArgs(call) + if len(*outStrings) > 0 { + out.Set("string_args", *outStrings) + } + var otherChain *otto.Object + var thisObj *otto.Object + if len(*outStrings) != 0 { + otherChain, thisObj = reverseGremlinChainTo(call.Otto, prevObj, (*outStrings)[0].(string)) + } else { + otherChain, thisObj = reverseGremlinChainTo(call.Otto, prevObj, "") + } + out.Set("_gremlin_prev", thisObj) + out.Set("_gremlin_back_chain", otherChain) + embedTraversals(env, ses, out) + if isVertexChain(call.This.Object()) { + embedFinals(env, ses, out) + } + return out.Value() + + } +} + +func gremlinFollowR(kind string, prevObj *otto.Object, env *otto.Otto, ses *GremlinSession) func(otto.FunctionCall) otto.Value { + return func(call otto.FunctionCall) otto.Value { + call.Otto.Run("var out = {}") + out, _ := call.Otto.Object("out") + out.Set("_gremlin_type", kind) + out.Set("_gremlin_values", call.ArgumentList) + outStrings := concatStringArgs(call) + if len(*outStrings) > 0 { + out.Set("string_args", *outStrings) + } + if len(call.ArgumentList) == 0 { + return prevObj.Value() + } + arg := call.Argument(0) + if isVertexChain(arg.Object()) { + return prevObj.Value() + } + newChain, _ := reverseGremlinChainTo(call.Otto, arg.Object(), "") + out.Set("_gremlin_prev", prevObj) + out.Set("_gremlin_followr", newChain) + embedTraversals(env, ses, out) + if isVertexChain(call.This.Object()) { + embedFinals(env, ses, out) + } + return out.Value() + + } +} + +func reverseGremlinChainTo(env *otto.Otto, prevObj *otto.Object, tag string) (*otto.Object, *otto.Object) { + env.Run("var _base_object = {}") + base, err := env.Object("_base_object") + if err != nil { + glog.Error(err) + return otto.NullValue().Object(), otto.NullValue().Object() + } + if isVertexChain(prevObj) { + base.Set("_gremlin_type", "vertex") + } else { + base.Set("_gremlin_type", "morphism") + } + return reverseGremlinChainHelper(env, prevObj, base, tag) +} + +func reverseGremlinChainHelper(env *otto.Otto, chain *otto.Object, newBase *otto.Object, tag string) (*otto.Object, *otto.Object) { + kindVal, _ := chain.Get("_gremlin_type") + kind, _ := kindVal.ToString() + + if tag != "" { + if kind == "tag" { + tags := getStringArgs(chain) + for _, t := range tags { + if t == tag { + return newBase, chain + } + } + } + } + + if kind == "morphism" || kind == "vertex" { + return newBase, chain + } + var newKind string + switch kind { + case "in": + newKind = "out" + case "out": + newKind = "in" + default: + newKind = kind + } + prev, _ := chain.Get("_gremlin_prev") + env.Run("var out = {}") + out, _ := env.Object("out") + out.Set("_gremlin_type", newKind) + values, _ := chain.Get("_gremlin_values") + out.Set("_gremlin_values", values) + back, _ := chain.Get("_gremlin_back_chain") + out.Set("_gremlin_back_chain", back) + out.Set("_gremlin_prev", newBase) + strings, _ := chain.Get("string_args") + out.Set("string_args", strings) + return reverseGremlinChainHelper(env, prev.Object(), out, tag) +} + +func debugChain(obj *otto.Object) bool { + val, _ := obj.Get("_gremlin_type") + x, _ := val.ToString() + glog.V(2).Infoln(x) + val, _ = obj.Get("_gremlin_prev") + if val.IsObject() { + return debugChain(val.Object()) + } + return false +} diff --git a/gremlin/gremlin_test.nt b/gremlin/gremlin_test.nt new file mode 100644 index 0000000..3febca3 --- /dev/null +++ b/gremlin/gremlin_test.nt @@ -0,0 +1,11 @@ +A follows B . +C follows B . +C follows D . +D follows B . +B follows F . +F follows G . +D follows G . +E follows F . +B status cool . +D status cool . +G status cool . diff --git a/http/cayley-http-docs.go b/http/cayley-http-docs.go new file mode 100644 index 0000000..45e747d --- /dev/null +++ b/http/cayley-http-docs.go @@ -0,0 +1,73 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "fmt" + "io/ioutil" + "net/http" + "os" + + "github.com/julienschmidt/httprouter" + "github.com/russross/blackfriday" +) + +type DocRequestHandler struct { +} + +func MarkdownWithCSS(input []byte, title string) []byte { + // set up the HTML renderer + htmlFlags := 0 + htmlFlags |= blackfriday.HTML_USE_XHTML + htmlFlags |= blackfriday.HTML_USE_SMARTYPANTS + htmlFlags |= blackfriday.HTML_SMARTYPANTS_FRACTIONS + htmlFlags |= blackfriday.HTML_SMARTYPANTS_LATEX_DASHES + htmlFlags |= blackfriday.HTML_COMPLETE_PAGE + renderer := blackfriday.HtmlRenderer(htmlFlags, title, markdownCSS) + + // set up the parser + extensions := 0 + //extensions |= blackfriday.EXTENSION_NO_INTRA_EMPHASIS + extensions |= blackfriday.EXTENSION_TABLES + extensions |= blackfriday.EXTENSION_FENCED_CODE + extensions |= blackfriday.EXTENSION_AUTOLINK + extensions |= blackfriday.EXTENSION_STRIKETHROUGH + //extensions |= blackfriday.EXTENSION_SPACE_HEADERS + extensions |= blackfriday.EXTENSION_HEADER_IDS + extensions |= blackfriday.EXTENSION_LAX_HTML_BLOCKS + + return blackfriday.Markdown(input, renderer, extensions) +} + +func (h *DocRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Request, params httprouter.Params) { + docpage := params.ByName("docpage") + if docpage == "" { + docpage = "Index" + } + file, err := os.Open(fmt.Sprintf("docs/%s.md", docpage)) + if err != nil { + http.Error(w, err.Error(), http.StatusNotFound) + return + } + data, err := ioutil.ReadAll(file) + if err != nil { + http.Error(w, err.Error(), http.StatusNoContent) + return + } + output := MarkdownWithCSS(data, fmt.Sprintf("Cayley Docs - %s", docpage)) + fmt.Fprint(w, string(output)) +} + +var markdownCSS = "/static/css/docs.css" diff --git a/http/cayley-http-query.go b/http/cayley-http-query.go new file mode 100644 index 0000000..4a99dd0 --- /dev/null +++ b/http/cayley-http-query.go @@ -0,0 +1,153 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + + "github.com/julienschmidt/httprouter" + + "github.com/google/cayley/graph" + "github.com/google/cayley/gremlin" + "github.com/google/cayley/mql" +) + +type SuccessQueryWrapper struct { + Result interface{} `json:"result"` +} + +type ErrorQueryWrapper struct { + Error string `json:"error"` +} + +func WrapErrResult(err error) ([]byte, error) { + var wrap ErrorQueryWrapper + wrap.Error = err.Error() + return json.MarshalIndent(wrap, "", " ") +} + +func WrapResult(result interface{}) ([]byte, error) { + var wrap SuccessQueryWrapper + wrap.Result = result + return json.MarshalIndent(wrap, "", " ") +} + +func RunJsonQuery(query string, ses graph.HttpSession) (interface{}, error) { + c := make(chan interface{}, 5) + go ses.ExecInput(query, c, 100) + for res := range c { + ses.BuildJson(res) + } + return ses.GetJson() +} + +func GetQueryShape(query string, ses graph.HttpSession) ([]byte, error) { + c := make(chan map[string]interface{}, 5) + go ses.GetQuery(query, c) + var data map[string]interface{} + for res := range c { + data = res + } + return json.Marshal(data) +} + +// TODO(barakmich): Turn this into proper middleware. +func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { + var ses graph.HttpSession + switch params.ByName("query_lang") { + case "gremlin": + ses = gremlin.NewGremlinSession(api.ts, api.config.GremlinTimeout, false) + case "mql": + ses = mql.NewMqlSession(api.ts) + default: + return FormatJson400(w, "Need a query language.") + } + var err error + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + return FormatJson400(w, err) + } + code := string(bodyBytes) + result, err := ses.InputParses(code) + switch result { + case graph.Parsed: + var output interface{} + var bytes []byte + var err error + output, err = RunJsonQuery(code, ses) + if err != nil { + bytes, err = WrapErrResult(err) + http.Error(w, string(bytes), 400) + ses = nil + return 400 + } + bytes, err = WrapResult(output) + if err != nil { + ses = nil + return FormatJson400(w, err) + } + fmt.Fprint(w, string(bytes)) + ses = nil + return 200 + case graph.ParseFail: + ses = nil + return FormatJson400(w, err) + default: + ses = nil + return FormatJsonError(w, 500, "Incomplete data?") + } + http.Error(w, "", http.StatusNotFound) + ses = nil + return http.StatusNotFound +} + +func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { + var ses graph.HttpSession + switch params.ByName("query_lang") { + case "gremlin": + ses = gremlin.NewGremlinSession(api.ts, api.config.GremlinTimeout, false) + case "mql": + ses = mql.NewMqlSession(api.ts) + default: + return FormatJson400(w, "Need a query language.") + } + var err error + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + return FormatJson400(w, err) + } + code := string(bodyBytes) + result, err := ses.InputParses(code) + switch result { + case graph.Parsed: + var output []byte + var err error + output, err = GetQueryShape(code, ses) + if err != nil { + return FormatJson400(w, err) + } + fmt.Fprint(w, string(output)) + return 200 + case graph.ParseFail: + return FormatJson400(w, err) + default: + return FormatJsonError(w, 500, "Incomplete data?") + } + http.Error(w, "", http.StatusNotFound) + return http.StatusNotFound +} diff --git a/http/cayley-http-write.go b/http/cayley-http-write.go new file mode 100644 index 0000000..20fb60d --- /dev/null +++ b/http/cayley-http-write.go @@ -0,0 +1,119 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "net/http" + "strconv" + + "github.com/barakmich/glog" + "github.com/julienschmidt/httprouter" + + "github.com/google/cayley/graph" + "github.com/google/cayley/nquads" +) + +func ParseJsonToTripleList(jsonBody []byte) ([]*graph.Triple, error) { + var tripleList []*graph.Triple + err := json.Unmarshal(jsonBody, &tripleList) + if err != nil { + return nil, err + } + for i, t := range tripleList { + if !t.IsValid() { + return nil, errors.New(fmt.Sprintf("Invalid triple at index %d. %s", i, t.ToString())) + } + } + return tripleList, nil +} + +func (api *Api) ServeV1Write(w http.ResponseWriter, r *http.Request, _ httprouter.Params) int { + if api.config.ReadOnly { + return FormatJson400(w, "Database is read-only.") + } + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + return FormatJson400(w, err) + } + tripleList, terr := ParseJsonToTripleList(bodyBytes) + if terr != nil { + return FormatJson400(w, terr) + } + api.ts.AddTripleSet(tripleList) + fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", len(tripleList)) + return 200 +} + +func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { + if api.config.ReadOnly { + return FormatJson400(w, "Database is read-only.") + } + + formFile, _, err := r.FormFile("NQuadFile") + if err != nil { + glog.Errorln(err) + return FormatJsonError(w, 500, "Couldn't read file: "+err.Error()) + } + + defer formFile.Close() + + blockSize, blockErr := strconv.ParseInt(r.URL.Query().Get("block_size"), 10, 64) + if blockErr != nil { + blockSize = int64(api.config.LoadSize) + } + + tChan := make(chan *graph.Triple) + go nquads.ReadNQuadsFromReader(tChan, formFile) + tripleblock := make([]*graph.Triple, blockSize) + nTriples := 0 + i := int64(0) + for t := range tChan { + tripleblock[i] = t + i++ + nTriples++ + if i == blockSize { + api.ts.AddTripleSet(tripleblock) + i = 0 + } + } + api.ts.AddTripleSet(tripleblock[0:i]) + fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", nTriples) + return 200 +} + +func (api *Api) ServeV1Delete(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { + if api.config.ReadOnly { + return FormatJson400(w, "Database is read-only.") + } + bodyBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + return FormatJson400(w, err) + } + tripleList, terr := ParseJsonToTripleList(bodyBytes) + if terr != nil { + return FormatJson400(w, terr) + } + count := 0 + for _, triple := range tripleList { + api.ts.RemoveTriple(triple) + count++ + } + fmt.Fprintf(w, "{\"result\": \"Successfully deleted %d triples.\"}", count) + return 200 +} diff --git a/http/cayley-http.go b/http/cayley-http.go new file mode 100644 index 0000000..68d50bb --- /dev/null +++ b/http/cayley-http.go @@ -0,0 +1,113 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "fmt" + "html/template" + "net/http" + "time" + + "github.com/barakmich/glog" + "github.com/julienschmidt/httprouter" + + cfg "github.com/google/cayley/config" + "github.com/google/cayley/graph" +) + +type ResponseHandler func(http.ResponseWriter, *http.Request, httprouter.Params) int + +func LogRequest(handler ResponseHandler) httprouter.Handle { + return func(w http.ResponseWriter, req *http.Request, params httprouter.Params) { + start := time.Now() + addr := req.Header.Get("X-Real-IP") + if addr == "" { + addr = req.Header.Get("X-Forwarded-For") + if addr == "" { + addr = req.RemoteAddr + } + } + glog.Infof("Started %s %s for %s", req.Method, req.URL.Path, addr) + code := handler(w, req, params) + glog.Infof("Completed %v %s %s in %v", code, http.StatusText(code), req.URL.Path, time.Since(start)) + + } +} + +func FormatJson400(w http.ResponseWriter, err interface{}) int { + return FormatJsonError(w, 400, err) +} + +func FormatJsonError(w http.ResponseWriter, code int, err interface{}) int { + http.Error(w, fmt.Sprintf("{\"error\" : \"%s\"}", err), code) + return code +} + +type TemplateRequestHandler struct { + templates *template.Template +} + +func (h *TemplateRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Request, params httprouter.Params) { + uiType := params.ByName("ui_type") + if r.URL.Path == "/" { + uiType = "query" + } + err := h.templates.ExecuteTemplate(w, uiType+".html", h) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } +} + +type Api struct { + config *cfg.CayleyConfig + ts graph.TripleStore +} + +func (api *Api) ApiV1(r *httprouter.Router) { + r.POST("/api/v1/query/:query_lang", LogRequest(api.ServeV1Query)) + r.POST("/api/v1/shape/:query_lang", LogRequest(api.ServeV1Shape)) + r.POST("/api/v1/write", LogRequest(api.ServeV1Write)) + r.POST("/api/v1/write/file/nquad", LogRequest(api.ServeV1WriteNQuad)) + //TODO(barakmich): /write/text/nquad, which reads from request.body instead of HTML5 file form? + r.POST("/api/v1/delete", LogRequest(api.ServeV1Delete)) +} + +func SetupRoutes(ts graph.TripleStore, config *cfg.CayleyConfig) { + r := httprouter.New() + var templates = template.Must(template.ParseGlob("templates/*.tmpl")) + templates.ParseGlob("templates/*.html") + root := &TemplateRequestHandler{templates: templates} + docs := &DocRequestHandler{} + api := &Api{config: config, ts: ts} + api.ApiV1(r) + + //m.Use(martini.Static("static", martini.StaticOptions{Prefix: "/static", SkipLogging: true})) + //r.Handler("GET", "/static", http.StripPrefix("/static", http.FileServer(http.Dir("static/")))) + r.GET("/docs/:docpage", docs.ServeHTTP) + r.GET("/ui/:ui_type", root.ServeHTTP) + r.GET("/", root.ServeHTTP) + http.Handle("/static/", http.StripPrefix("/static", http.FileServer(http.Dir("static/")))) + http.Handle("/", r) +} + +func CayleyHTTP(ts graph.TripleStore, config *cfg.CayleyConfig) { + SetupRoutes(ts, config) + glog.Infof("Cayley now listening on %s:%s\n", config.ListenHost, config.ListenPort) + fmt.Printf("Cayley now listening on %s:%s\n", config.ListenHost, config.ListenPort) + err := http.ListenAndServe(fmt.Sprintf("%s:%s", config.ListenHost, config.ListenPort), nil) + if err != nil { + glog.Fatal("ListenAndServe: ", err) + } +} diff --git a/http/cayley-http_test.go b/http/cayley-http_test.go new file mode 100644 index 0000000..d59749f --- /dev/null +++ b/http/cayley-http_test.go @@ -0,0 +1,53 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "testing" + . "github.com/smartystreets/goconvey/convey" +) + +func TestParseJSONOkay(t *testing.T) { + Convey("Parse JSON", t, func() { + bytelist := []byte(`[ + {"subject": "foo", "predicate": "bar", "object": "baz"}, + {"subject": "foo", "predicate": "bar", "object": "baz", "provenance": "graph"} + ]`) + x, err := ParseJsonToTripleList(bytelist) + So(err, ShouldBeNil) + So(len(x), ShouldEqual, 2) + So(x[0].Sub, ShouldEqual, "foo") + So(x[0].Provenance, ShouldEqual, "") + So(x[1].Provenance, ShouldEqual, "graph") + }) + + Convey("Parse JSON extra field", t, func() { + bytelist := []byte(`[ + {"subject": "foo", "predicate": "bar", "object": "foo", "something_else": "extra data"} + ]`) + _, err := ParseJsonToTripleList(bytelist) + So(err, ShouldBeNil) + }) +} + +func TestParseJSONFail(t *testing.T) { + Convey("Parse JSON Fail", t, func() { + bytelist := []byte(`[ + {"subject": "foo", "predicate": "bar"} + ]`) + _, err := ParseJsonToTripleList(bytelist) + So(err, ShouldNotBeNil) + }) +} diff --git a/init.go b/init.go new file mode 100644 index 0000000..54a66cd --- /dev/null +++ b/init.go @@ -0,0 +1,40 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cayley + +import ( + cfg "github.com/google/cayley/config" + "github.com/google/cayley/graph/leveldb" + "github.com/google/cayley/graph/mongo" +) + +func CayleyInit(config *cfg.CayleyConfig, triplePath string) bool { + created := false + dbpath := config.DatabasePath + switch config.DatabaseType { + case "mongo", "mongodb": + created = mongo.CreateNewMongoGraph(dbpath, config.DatabaseOptions) + case "leveldb": + created = leveldb.CreateNewLevelDB(dbpath) + case "mem": + return true + } + if created && triplePath != "" { + ts := OpenTSFromConfig(config) + CayleyLoad(ts, config, triplePath, true) + ts.Close() + } + return created +} diff --git a/load.go b/load.go new file mode 100644 index 0000000..0d00e6d --- /dev/null +++ b/load.go @@ -0,0 +1,81 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cayley + +import ( + "os" + + "github.com/barakmich/glog" + + cfg "github.com/google/cayley/config" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/mongo" + "github.com/google/cayley/nquads" +) + +func CayleyLoad(ts graph.TripleStore, config *cfg.CayleyConfig, triplePath string, firstTime bool) { + switch config.DatabaseType { + case "mongo", "mongodb": + if firstTime { + loadMongo(ts.(*mongo.MongoTripleStore), triplePath) + } else { + LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) + } + case "rethink", "rethinkdb": + LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) + case "leveldb": + LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) + case "mem": + LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) + } + +} + +func loadMongo(ts *mongo.MongoTripleStore, path string) { + tChan := make(chan *graph.Triple) + go ReadTriplesFromFile(tChan, path) + ts.BulkLoad(tChan) +} + +func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) { + f, err := os.Open(tripleFile) + if err != nil { + glog.Fatalln("Couldn't open file", tripleFile) + } + + defer func() { + if err := f.Close(); err != nil { + glog.Fatalln(err) + } + }() + + nquads.ReadNQuadsFromReader(c, f) +} + +func LoadTriplesFromFileInto(ts graph.TripleStore, filename string, loadSize int) { + tChan := make(chan *graph.Triple) + go ReadTriplesFromFile(tChan, filename) + tripleblock := make([]*graph.Triple, loadSize) + i := 0 + for t := range tChan { + tripleblock[i] = t + i++ + if i == loadSize { + ts.AddTripleSet(tripleblock) + i = 0 + } + } + ts.AddTripleSet(tripleblock[0:i]) +} diff --git a/make.sh b/make.sh deleted file mode 100755 index 395a277..0000000 --- a/make.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2014 The Cayley Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -cd "`dirname '$0'`" -SCRIPTPATH="`pwd`" -cd - > /dev/null - -export GOPATH=$SCRIPTPATH -export GOBIN= - -function deps { -echo "Fetching dependencies to $SCRIPTPATH..." -printf " (00/15)\r" - go get -u -t github.com/smartystreets/goconvey -printf "# (01/15)\r" - go get -u github.com/badgerodon/peg -printf "## (02/15)\r" - go get -u github.com/barakmich/glog -printf "#### (03/15)\r" - go get -u github.com/julienschmidt/httprouter -printf "##### (04/15)\r" - go get -u github.com/petar/GoLLRB/llrb -printf "###### (05/15)\r" - go get -u github.com/robertkrimen/otto -printf "####### (06/15)\r" - go get -u github.com/stretchrcom/testify -printf "######## (07/15)\r" - go get -u github.com/syndtr/goleveldb/leveldb -printf "######### (08/15)\r" - go get -u github.com/syndtr/goleveldb/leveldb/cache -printf "########## (09/15)\r" - go get -u github.com/syndtr/goleveldb/leveldb/iterator -printf "########### (10/15)\r" - go get -u github.com/syndtr/goleveldb/leveldb/opt -printf "############ (11/15)\r" - go get -u github.com/syndtr/goleveldb/leveldb/util -printf "############# (12/15)\r" - go get -u labix.org/v2/mgo -printf "############## (13/15)\r" - go get -u labix.org/v2/mgo/bson -printf "############### (14/15)\r" - go get -u github.com/russross/blackfriday -printf "################ (15/15)\r" -printf "\n" -} - -function build { - go build cayley -} - -$1 diff --git a/mql/mql-build-iterator.go b/mql/mql-build-iterator.go new file mode 100644 index 0000000..6273696 --- /dev/null +++ b/mql/mql-build-iterator.go @@ -0,0 +1,181 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mql + +import ( + "errors" + "fmt" + "log" + "math" + "strings" + + "github.com/google/cayley/graph" +) + +func (m *MqlQuery) buildFixed(s string) graph.Iterator { + f := m.ses.ts.MakeFixed() + f.AddValue(m.ses.ts.GetIdFor(s)) + return f +} + +func (m *MqlQuery) buildResultIterator(path MqlPath) graph.Iterator { + all := m.ses.ts.GetNodesAllIterator() + all.AddTag(string(path)) + return graph.NewOptionalIterator(all) +} + +func (m *MqlQuery) BuildIteratorTree(query interface{}) { + m.isRepeated = make(map[MqlPath]bool) + m.queryStructure = make(map[MqlPath]map[string]interface{}) + m.queryResult = make(map[MqlResultPath]map[string]interface{}) + m.queryResult[""] = make(map[string]interface{}) + + m.it, m.err = m.buildIteratorTreeInternal(query, NewMqlPath()) + if m.err != nil { + m.isError = true + } +} + +func (m *MqlQuery) buildIteratorTreeInternal(query interface{}, path MqlPath) (graph.Iterator, error) { + var it graph.Iterator + var err error + err = nil + switch t := query.(type) { + case bool: + // for JSON booleans + // Treat the bool as a string and call it a day. + // Things which are really bool-like are special cases and will be dealt with separately. + if t { + it = m.buildFixed("true") + } + it = m.buildFixed("false") + case float64: + // for JSON numbers + // Damn you, Javascript, and your lack of integer values. + if math.Floor(t) == t { + // Treat it like an integer. + it = m.buildFixed(fmt.Sprintf("%d", t)) + } else { + it = m.buildFixed(fmt.Sprintf("%f", t)) + } + case string: + // for JSON strings + it = m.buildFixed(t) + case []interface{}: + // for JSON arrays + m.isRepeated[path] = true + if len(t) == 0 { + it = m.buildResultIterator(path) + } else if len(t) == 1 { + it, err = m.buildIteratorTreeInternal(t[0], path) + } else { + err = errors.New(fmt.Sprintf("Multiple fields at location root%s", path.DisplayString())) + } + case map[string]interface{}: + // for JSON objects + it, err = m.buildIteratorTreeMapInternal(t, path) + case nil: + it = m.buildResultIterator(path) + default: + log.Fatal("Unknown JSON type?", query) + } + if err != nil { + return nil, err + } + it.AddTag(string(path)) + return it, nil +} + +func (m *MqlQuery) buildIteratorTreeMapInternal(query map[string]interface{}, path MqlPath) (graph.Iterator, error) { + it := graph.NewAndIterator() + it.AddSubIterator(m.ses.ts.GetNodesAllIterator()) + var err error + err = nil + outputStructure := make(map[string]interface{}) + for key, subquery := range query { + outputStructure[key] = nil + reverse := false + pred := key + if strings.HasPrefix(pred, "@") { + i := strings.Index(pred, ":") + if i != -1 { + pred = pred[(i + 1):] + } + } + if strings.HasPrefix(pred, "!") { + reverse = true + pred = strings.TrimPrefix(pred, "!") + } + + // Other special constructs here + var subit graph.Iterator + if key == "id" { + subit, err = m.buildIteratorTreeInternal(subquery, path.Follow(key)) + if err != nil { + return nil, err + } + it.AddSubIterator(subit) + } else { + subit, err = m.buildIteratorTreeInternal(subquery, path.Follow(key)) + if err != nil { + return nil, err + } + subAnd := graph.NewAndIterator() + predFixed := m.ses.ts.MakeFixed() + predFixed.AddValue(m.ses.ts.GetIdFor(pred)) + subAnd.AddSubIterator(graph.NewLinksToIterator(m.ses.ts, predFixed, "p")) + if reverse { + lto := graph.NewLinksToIterator(m.ses.ts, subit, "s") + subAnd.AddSubIterator(lto) + hasa := graph.NewHasaIterator(m.ses.ts, subAnd, "o") + it.AddSubIterator(hasa) + } else { + lto := graph.NewLinksToIterator(m.ses.ts, subit, "o") + subAnd.AddSubIterator(lto) + hasa := graph.NewHasaIterator(m.ses.ts, subAnd, "s") + it.AddSubIterator(hasa) + } + } + } + if err != nil { + return nil, err + } + m.queryStructure[path] = outputStructure + return it, nil +} + +type MqlResultPathSlice []MqlResultPath + +func (sl MqlResultPathSlice) Len() int { + return len(sl) +} + +func (sl MqlResultPathSlice) Less(i, j int) bool { + iLen := len(strings.Split(string(sl[i]), "\x30")) + jLen := len(strings.Split(string(sl[j]), "\x30")) + if iLen < jLen { + return true + } + if iLen == jLen { + if len(string(sl[i])) < len(string(sl[j])) { + return true + } + } + return false +} + +func (sl MqlResultPathSlice) Swap(i, j int) { + sl[i], sl[j] = sl[j], sl[i] +} diff --git a/mql/mql-fill.go b/mql/mql-fill.go new file mode 100644 index 0000000..26de32a --- /dev/null +++ b/mql/mql-fill.go @@ -0,0 +1,114 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mql + +import ( + "sort" + + "github.com/google/cayley/graph" +) + +func (m *MqlQuery) treeifyResult(tags map[string]graph.TSVal) map[MqlResultPath]string { + // Transform the map into something a little more interesting. + results := make(map[MqlPath]string) + for k, v := range tags { + results[MqlPath(k)] = m.ses.ts.GetNameFor(v) + } + resultPaths := make(map[MqlResultPath]string) + for k, v := range results { + resultPaths[k.ToResultPathFromMap(results)] = v + } + + var paths MqlResultPathSlice + + for path, _ := range resultPaths { + paths = append(paths, path) + } + + sort.Sort(paths) + + // Build Structure + for _, path := range paths { + currentPath := path.getPath() + value := resultPaths[path] + namePath := path.AppendValue(value) + if _, ok := m.queryResult[namePath]; !ok { + targetPath, key := path.splitLastPath() + if path == "" { + targetPath, key = "", value + if _, ok := m.queryResult[""][value]; !ok { + m.resultOrder = append(m.resultOrder, value) + } + } + if _, ok := m.queryStructure[currentPath]; ok { + // If there's substructure, then copy that in. + newStruct := m.copyPathStructure(currentPath) + if m.isRepeated[currentPath] && currentPath != "" { + switch t := m.queryResult[targetPath][key].(type) { + case nil: + x := make([]interface{}, 0) + x = append(x, newStruct) + m.queryResult[targetPath][key] = x + m.queryResult[namePath] = newStruct + case []interface{}: + m.queryResult[targetPath][key] = append(t, newStruct) + m.queryResult[namePath] = newStruct + } + + } else { + m.queryResult[namePath] = newStruct + m.queryResult[targetPath][key] = newStruct + } + } + } + } + + // Fill values + for _, path := range paths { + currentPath := path.getPath() + value := resultPaths[path] + namePath := path.AppendValue(value) + if _, ok := m.queryStructure[currentPath]; ok { + // We're dealing with ids. + if _, ok := m.queryResult[namePath]["id"]; ok { + m.queryResult[namePath]["id"] = value + } + } else { + // Just a value. + targetPath, key := path.splitLastPath() + if m.isRepeated[currentPath] { + switch t := m.queryResult[targetPath][key].(type) { + case nil: + x := make([]interface{}, 0) + x = append(x, value) + m.queryResult[targetPath][key] = x + case []interface{}: + m.queryResult[targetPath][key] = append(t, value) + } + + } else { + m.queryResult[targetPath][key] = value + } + } + } + + return resultPaths +} + +func (m *MqlQuery) buildResults() { + for _, v := range m.resultOrder { + m.results = append(m.results, m.queryResult[""][v]) + } +} diff --git a/mql/mql-functional_test.go b/mql/mql-functional_test.go new file mode 100644 index 0000000..97c2eac --- /dev/null +++ b/mql/mql-functional_test.go @@ -0,0 +1,264 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mql + +import ( + "encoding/json" + "testing" + + . "github.com/smartystreets/goconvey/convey" + + "github.com/google/cayley/graph/memstore" +) + +// +---+ +---+ +// | A |------- ->| F |<-- +// +---+ \------>+---+-/ +---+ \--+---+ +// ------>|#B#| | | E | +// +---+-------/ >+---+ | +---+ +// | C | / v +// +---+ -/ +---+ +// ---- +---+/ |#G#| +// \-->|#D#|------------->+---+ +// +---+ +// + +func buildTripleStore() *MqlSession { + ts := memstore.MakeTestingMemstore() + return NewMqlSession(ts) +} + +func compareJsonInterfaces(actual interface{}, expected interface{}, path MqlPath, t *testing.T) { + isError := false + switch ex := expected.(type) { + case bool: + switch ac := actual.(type) { + case bool: + if ac != ex { + isError = true + } + default: + t.Log("Mismatched type") + isError = true + } + case float64: + switch ac := actual.(type) { + case float64: + if ac != ex { + isError = true + } + default: + t.Log("Mismatched type") + isError = true + } + case string: + switch ac := actual.(type) { + case string: + if ac != ex { + isError = true + } + default: + isError = true + } + case []interface{}: + switch ac := actual.(type) { + case []interface{}: + if len(ac) != len(ex) { + t.Log("Different lengths") + isError = true + } else { + for i, elem := range ex { + compareJsonInterfaces(ac[i], elem, path.Follow(string(i)), t) + } + } + default: + t.Log("Mismatched type") + isError = true + } + case map[string]interface{}: + switch ac := actual.(type) { + case map[string]interface{}: + for k, v := range ex { + actual_value, ok := ac[k] + if !ok { + t.Log("Key", k, "not in actual output.") + isError = true + } else { + compareJsonInterfaces(actual_value, v, path.Follow(string(k)), t) + } + } + default: + t.Log("Mismatched type") + isError = true + } + case nil: + switch ac := actual.(type) { + case nil: + if ac != ex { + isError = true + } + default: + t.Log("Mismatched type") + isError = true + } + default: + t.Error("Unknown JSON type?", expected) + } + + if isError { + actual_bytes, _ := json.MarshalIndent(actual, "", " ") + expected_bytes, _ := json.MarshalIndent(expected, "", " ") + t.Error(path.DisplayString(), ":\n", string(actual_bytes), "\nexpected", string(expected_bytes)) + } +} + +func runAndTestQuery(query string, expected string, t *testing.T) { + ses := buildTripleStore() + c := make(chan interface{}, 5) + go ses.ExecInput(query, c, -1) + for result := range c { + ses.BuildJson(result) + } + actual_struct, _ := ses.GetJson() + var expected_struct interface{} + json.Unmarshal([]byte(expected), &expected_struct) + compareJsonInterfaces(actual_struct, expected_struct, NewMqlPath(), t) + ses.ClearJson() +} + +func TestGetAllIds(t *testing.T) { + Convey("Should get all IDs in the database", t, func() { + query := ` + [{"id": null}] + ` + expected := ` + [ + {"id": "A"}, + {"id": "follows"}, + {"id": "B"}, + {"id": "C"}, + {"id": "D"}, + {"id": "F"}, + {"id": "G"}, + {"id": "E"}, + {"id": "status"}, + {"id": "cool"}, + {"id": "status_graph"} + ] + ` + runAndTestQuery(query, expected, t) + }) +} + +func TestGetCool(t *testing.T) { + query := ` + [{"id": null, "status": "cool"}] + ` + expected := ` + [ + {"id": "B", "status": "cool"}, + {"id": "D", "status": "cool"}, + {"id": "G", "status": "cool"} + ] + ` + runAndTestQuery(query, expected, t) +} + +func TestGetFollowsList(t *testing.T) { + query := ` + [{"id": "C", "follows": []}] + ` + expected := ` + [{ + "id": "C", + "follows": [ + "B", "D" + ] + }] + ` + runAndTestQuery(query, expected, t) +} + +func TestGetFollowsStruct(t *testing.T) { + query := ` + [{"id": null, "follows": {"id": null, "status": "cool"}}] + ` + expected := ` + [ + {"id": "A", "follows": {"id": "B", "status": "cool"}}, + {"id": "C", "follows": {"id": "D", "status": "cool"}}, + {"id": "D", "follows": {"id": "G", "status": "cool"}}, + {"id": "F", "follows": {"id": "G", "status": "cool"}} + ] + ` + runAndTestQuery(query, expected, t) +} + +func TestGetFollowsReverseStructList(t *testing.T) { + query := ` + [{"id": null, "!follows": [{"id": null, "status" : "cool"}]}] + ` + expected := ` + [ + {"id": "F", "!follows": [{"id": "B", "status": "cool"}]}, + {"id": "B", "!follows": [{"id": "D", "status": "cool"}]}, + {"id": "G", "!follows": [{"id": "D", "status": "cool"}]} + ] + ` + runAndTestQuery(query, expected, t) +} + +func TestGetRevFollowsList(t *testing.T) { + query := ` + [{"id": "F", "!follows": []}] + ` + expected := ` + [{ + "id": "F", + "!follows": [ + "B", "E" + ] + }] + ` + runAndTestQuery(query, expected, t) +} + +func TestCoFollows(t *testing.T) { + query := ` + [{"id": null, "@A:follows": "B", "@B:follows": "D"}] + ` + expected := ` + [{ + "id": "C", + "@A:follows": "B", + "@B:follows": "D" + }] + ` + runAndTestQuery(query, expected, t) +} + +func TestRevCoFollows(t *testing.T) { + query := ` + [{"id": null, "!follows": {"id": "C"}, "@a:!follows": "D"}] + ` + expected := ` + [{ + "id": "B", + "!follows": {"id": "C"}, + "@a:!follows": "D" + }] + ` + runAndTestQuery(query, expected, t) +} diff --git a/mql/mql-query.go b/mql/mql-query.go new file mode 100644 index 0000000..66d8179 --- /dev/null +++ b/mql/mql-query.go @@ -0,0 +1,111 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mql + +import ( + "fmt" + "strings" + + "github.com/google/cayley/graph" +) + +type MqlPath string +type MqlResultPath string + +type MqlQuery struct { + ses *MqlSession + it graph.Iterator + isRepeated map[MqlPath]bool + queryStructure map[MqlPath]map[string]interface{} + queryResult map[MqlResultPath]map[string]interface{} + results []interface{} + resultOrder []string + isError bool + err error +} + +func (mqlQuery *MqlQuery) copyPathStructure(path MqlPath) map[string]interface{} { + output := make(map[string]interface{}) + for k, v := range mqlQuery.queryStructure[path] { + output[k] = v + } + return output +} + +func NewMqlPath() MqlPath { + return "" +} +func (p MqlPath) Follow(s string) MqlPath { + return MqlPath(fmt.Sprintf("%s\x1E%s", p, s)) +} + +func (p MqlPath) DisplayString() string { + return strings.Replace(string(p), "\x1E", ".", -1) +} + +func NewMqlResultPath() MqlResultPath { + return "" +} + +func (p MqlResultPath) FollowPath(followPiece string, value string) MqlResultPath { + if string(p) == "" { + return MqlResultPath(fmt.Sprintf("%s\x1E%s", value, followPiece)) + } + return MqlResultPath(fmt.Sprintf("%s\x1E%s\x1E%s", p, value, followPiece)) +} + +func (p MqlResultPath) getPath() MqlPath { + out := NewMqlPath() + pathPieces := strings.Split(string(p), "\x1E") + for len(pathPieces) > 1 { + a := pathPieces[1] + pathPieces = pathPieces[2:] + out = out.Follow(a) + } + return out +} + +func (p MqlResultPath) splitLastPath() (MqlResultPath, string) { + pathPieces := strings.Split(string(p), "\x1E") + return MqlResultPath(strings.Join(pathPieces[:len(pathPieces)-1], "\x1E")), pathPieces[len(pathPieces)-1] +} + +func (p MqlResultPath) AppendValue(value string) MqlResultPath { + if string(p) == "" { + return MqlResultPath(value) + } + return MqlResultPath(fmt.Sprintf("%s\x1E%s", p, value)) +} + +func (p MqlPath) ToResultPathFromMap(resultMap map[MqlPath]string) MqlResultPath { + output := NewMqlResultPath() + pathPieces := strings.Split(string(p), "\x1E")[1:] + pathSoFar := NewMqlPath() + for _, piece := range pathPieces { + output = output.FollowPath(piece, resultMap[pathSoFar]) + pathSoFar = pathSoFar.Follow(piece) + } + return output +} + +func NewMqlQuery(ses *MqlSession) *MqlQuery { + var q MqlQuery + q.ses = ses + q.results = make([]interface{}, 0) + q.resultOrder = make([]string, 0) + q.err = nil + q.isError = false + return &q +} diff --git a/mql/mql-session.go b/mql/mql-session.go new file mode 100644 index 0000000..96d88bf --- /dev/null +++ b/mql/mql-session.go @@ -0,0 +1,144 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mql + +import ( + "encoding/json" + "fmt" + "sort" + + "github.com/barakmich/glog" + + "github.com/google/cayley/graph" +) + +type MqlSession struct { + ts graph.TripleStore + currentQuery *MqlQuery + debug bool +} + +func NewMqlSession(ts graph.TripleStore) *MqlSession { + var m MqlSession + m.ts = ts + return &m +} + +func (m *MqlSession) ToggleDebug() { + m.debug = !m.debug +} + +func (m *MqlSession) GetQuery(input string, output_struct chan map[string]interface{}) { + defer close(output_struct) + var mqlQuery interface{} + err := json.Unmarshal([]byte(input), &mqlQuery) + if err != nil { + return + } + m.currentQuery = NewMqlQuery(m) + m.currentQuery.BuildIteratorTree(mqlQuery) + output := make(map[string]interface{}) + graph.OutputQueryShapeForIterator(m.currentQuery.it, m.ts, &output) + nodes := output["nodes"].([]graph.Node) + new_nodes := make([]graph.Node, 0) + for _, n := range nodes { + n.Tags = nil + new_nodes = append(new_nodes, n) + } + output["nodes"] = new_nodes + output_struct <- output +} + +func (m *MqlSession) InputParses(input string) (graph.ParseResult, error) { + var x interface{} + err := json.Unmarshal([]byte(input), &x) + if err != nil { + return graph.ParseFail, err + } + return graph.Parsed, nil +} + +func (m *MqlSession) ExecInput(input string, c chan interface{}, limit int) { + defer close(c) + var mqlQuery interface{} + err := json.Unmarshal([]byte(input), &mqlQuery) + if err != nil { + return + } + m.currentQuery = NewMqlQuery(m) + m.currentQuery.BuildIteratorTree(mqlQuery) + if m.currentQuery.isError { + return + } + it, _ := m.currentQuery.it.Optimize() + if glog.V(2) { + glog.V(2).Infoln(it.DebugString(0)) + } + for { + _, ok := it.Next() + if !ok { + break + } + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + c <- &tags + for it.NextResult() == true { + tags := make(map[string]graph.TSVal) + it.TagResults(&tags) + c <- &tags + } + } +} + +func (m *MqlSession) ToText(result interface{}) string { + tags := *(result.(*map[string]graph.TSVal)) + out := fmt.Sprintln("****") + tagKeys := make([]string, len(tags)) + m.currentQuery.treeifyResult(tags) + m.currentQuery.buildResults() + r, _ := json.MarshalIndent(m.currentQuery.results, "", " ") + fmt.Println(string(r)) + i := 0 + for k, _ := range tags { + tagKeys[i] = string(k) + i++ + } + sort.Strings(tagKeys) + for _, k := range tagKeys { + if k == "$_" { + continue + } + out += fmt.Sprintf("%s : %s\n", k, m.ts.GetNameFor(tags[k])) + } + return out +} + +func (m *MqlSession) BuildJson(result interface{}) { + m.currentQuery.treeifyResult(*(result.(*map[string]graph.TSVal))) +} + +func (m *MqlSession) GetJson() (interface{}, error) { + m.currentQuery.buildResults() + if m.currentQuery.isError { + return nil, m.currentQuery.err + } else { + return m.currentQuery.results, nil + } +} + +func (m *MqlSession) ClearJson() { + // Since we create a new MqlQuery underneath every query, clearing isn't necessary. + return +} diff --git a/nquads/nquads.go b/nquads/nquads.go new file mode 100644 index 0000000..f4032b3 --- /dev/null +++ b/nquads/nquads.go @@ -0,0 +1,196 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nquads + +import ( + "bufio" + "io" + "strings" + + "github.com/barakmich/glog" + + "github.com/google/cayley/graph" +) + +func isWhitespace(s uint8) bool { + return (s == '\t' || s == '\r' || s == ' ') +} +func ParseLineToTriple(str string) *graph.Triple { + // Skip leading whitespace. + str = skipWhitespace(str) + // Check for a comment + if str != "" && str[0] == '#' { + return nil + } + sub, remainder := getTripleComponent(str) + if sub == nil { + return nil + } + str = skipWhitespace(remainder) + pred, remainder := getTripleComponent(str) + if pred == nil { + return nil + } + str = skipWhitespace(remainder) + obj, remainder := getTripleComponent(str) + if obj == nil { + return nil + } + str = skipWhitespace(remainder) + prov_ptr, remainder := getTripleComponent(str) + var prov string + if prov_ptr == nil { + prov = "" + } else { + prov = *prov_ptr + } + str = skipWhitespace(remainder) + if str != "" && str[0] == '.' { + return graph.MakeTriple(*sub, *pred, *obj, prov) + } + return nil +} + +func skipWhitespace(str string) string { + i := 0 + for i < len(str) && isWhitespace(str[i]) { + i += 1 + } + return str[i:] +} + +func getTripleComponent(str string) (*string, string) { + if len(str) == 0 { + return nil, str + } + if str[0] == '<' { + return getUriPart(str[1:]) + } else if str[0] == '"' { + return getQuotedPart(str[1:]) + } else if str[0] == '.' { + return nil, str + } else { + // Technically not part of the spec. But we do it anyway for convenience. + return getUnquotedPart(str) + } +} + +func getUriPart(str string) (*string, string) { + i := 0 + for i < len(str) && str[i] != '>' { + i += 1 + } + if i == len(str) { + return nil, str + } + part := str[0:i] + return &part, str[i+1:] +} + +func getQuotedPart(str string) (*string, string) { + i := 0 + start := 0 + out := "" + for i < len(str) && str[i] != '"' { + if str[i] == '\\' { + out += str[start:i] + switch str[i+1] { + case '\\': + out += "\\" + case 'r': + out += "\r" + case 'n': + out += "\n" + case 't': + out += "\t" + case '"': + out += "\"" + default: + return nil, str + } + i += 2 + start = i + continue + } + i += 1 + } + if i == len(str) { + return nil, str + } + out += str[start:i] + i += 1 + var remainder string + if strings.HasPrefix(str[i:], "^^<") { + // Ignore type, for now + _, remainder = getUriPart(str[i+3:]) + } else if strings.HasPrefix(str[i:], "@") { + _, remainder = getUnquotedPart(str[i+1:]) + } else { + remainder = str[i:] + } + + return &out, remainder +} + +func getUnquotedPart(str string) (*string, string) { + i := 0 + initStr := str + out := "" + start := 0 + for i < len(str) && !isWhitespace(str[i]) { + if str[i] == '"' { + part, remainder := getQuotedPart(str[i+1:]) + if part == nil { + return part, initStr + } + out += str[start:i] + str = remainder + i = 0 + start = 0 + out += *part + } + i += 1 + } + out += str[start:i] + return &out, str[i:] +} + +func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) { + bf := bufio.NewReader(reader) + + nTriples := 0 + line := "" + for { + l, pre, err := bf.ReadLine() + if err == io.EOF { + break + } + if err != nil { + glog.Fatalln("Something bad happened while reading file " + err.Error()) + } + line += string(l) + if pre { + continue + } + triple := ParseLineToTriple(line) + line = "" + if triple != nil { + nTriples++ + c <- triple + } + } + glog.Infoln("Read", nTriples, "triples") + close(c) +} diff --git a/nquads/nquads_test.go b/nquads/nquads_test.go new file mode 100644 index 0000000..f5b61ee --- /dev/null +++ b/nquads/nquads_test.go @@ -0,0 +1,131 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nquads + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + + "github.com/google/cayley/graph" +) + +func TestParsingNTriples(t *testing.T) { + Convey("When parsing", t, func() { + Convey("It should not parse invalid triples", func() { + x := ParseLineToTriple("invalid") + So(x, ShouldBeNil) + }) + Convey("It should not parse comments", func() { + x := ParseLineToTriple("# nominally valid triple .") + So(x, ShouldBeNil) + }) + Convey("It should parse simple triples", func() { + x := ParseLineToTriple("this is valid .") + So(x, ShouldNotBeNil) + So(x.Sub, ShouldEqual, "this") + }) + Convey("It should parse quoted triples", func() { + x := ParseLineToTriple("this is \"valid too\" .") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "valid too") + So(x.Provenance, ShouldEqual, "") + }) + Convey("It should parse escaped quoted triples", func() { + x := ParseLineToTriple("he said \"\\\"That's all folks\\\"\" .") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "\"That's all folks\"") + So(x.Provenance, ShouldEqual, "") + }) + + Convey("It should parse an example real triple", func() { + x := ParseLineToTriple("\":/guid/9202a8c04000641f80000000010c843c\" \"name\" \"George Morris\" .") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "George Morris") + So(x.Provenance, ShouldEqual, "") + }) + + Convey("It should parse a pathologically spaced triple", func() { + x := ParseLineToTriple("foo is \"\\tA big tough\\r\\nDeal\\\\\" .") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "\tA big tough\r\nDeal\\") + So(x.Provenance, ShouldEqual, "") + }) + + Convey("It should parse a simple quad", func() { + x := ParseLineToTriple("this is valid quad .") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "valid") + So(x.Provenance, ShouldEqual, "quad") + }) + + Convey("It should parse a quoted quad", func() { + x := ParseLineToTriple("this is valid \"quad thing\" .") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "valid") + So(x.Provenance, ShouldEqual, "quad thing") + }) + + Convey("It should parse crazy escaped quads", func() { + x := ParseLineToTriple("\"\\\"this\" \"\\\"is\" \"\\\"valid\" \"\\\"quad thing\".") + So(x, ShouldNotBeNil) + So(x.Sub, ShouldEqual, "\"this") + So(x.Pred, ShouldEqual, "\"is") + So(x.Obj, ShouldEqual, "\"valid") + So(x.Provenance, ShouldEqual, "\"quad thing") + }) + }) +} + +func TestParsingNTriplesOfficial(t *testing.T) { + Convey("When using some public test cases...", t, func() { + Convey("It should handle some simple cases with comments", func() { + var x *graph.Triple + x = ParseLineToTriple(" . # comment") + So(x, ShouldNotBeNil) + So(x.Sub, ShouldEqual, "http://example/s") + So(x.Pred, ShouldEqual, "http://example/p") + So(x.Obj, ShouldEqual, "http://example/o") + So(x.Provenance, ShouldEqual, "") + x = ParseLineToTriple(" _:o . # comment") + So(x, ShouldNotBeNil) + So(x.Sub, ShouldEqual, "http://example/s") + So(x.Pred, ShouldEqual, "http://example/p") + So(x.Obj, ShouldEqual, "_:o") + So(x.Provenance, ShouldEqual, "") + x = ParseLineToTriple(" \"o\" . # comment") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "o") + So(x.Provenance, ShouldEqual, "") + x = ParseLineToTriple(" \"o\"^^ . # comment") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "o") + So(x.Provenance, ShouldEqual, "") + x = ParseLineToTriple(" \"o\"@en . # comment") + So(x, ShouldNotBeNil) + So(x.Obj, ShouldEqual, "o") + So(x.Provenance, ShouldEqual, "") + }) + }) +} + +func BenchmarkParser(b *testing.B) { + for n := 0; n < b.N; n++ { + x := ParseLineToTriple(" \"object of some real\\tlength\"@en . # comment") + if x.Obj != "object of some real\tlength" { + b.Fail() + } + } +} diff --git a/open.go b/open.go new file mode 100644 index 0000000..442fa9a --- /dev/null +++ b/open.go @@ -0,0 +1,40 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cayley + +import ( + "github.com/barakmich/glog" + + cfg "github.com/google/cayley/config" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/leveldb" + "github.com/google/cayley/graph/memstore" + "github.com/google/cayley/graph/mongo" +) + +func OpenTSFromConfig(config *cfg.CayleyConfig) graph.TripleStore { + glog.Infof("Opening database \"%s\" at %s", config.DatabaseType, config.DatabasePath) + switch config.DatabaseType { + case "mongo", "mongodb": + return mongo.NewMongoTripleStore(config.DatabasePath, config.DatabaseOptions) + case "leveldb": + return leveldb.NewDefaultLevelDBTripleStore(config.DatabasePath, config.DatabaseOptions) + case "mem": + ts := memstore.NewMemTripleStore() + CayleyLoad(ts, config, config.DatabasePath, true) + return ts + } + panic("Unsupported database backend " + config.DatabaseType) +} diff --git a/repl.go b/repl.go new file mode 100644 index 0000000..08a4d0e --- /dev/null +++ b/repl.go @@ -0,0 +1,143 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cayley + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" + "time" + + cfg "github.com/google/cayley/config" + "github.com/google/cayley/graph" + "github.com/google/cayley/graph/sexp" + "github.com/google/cayley/gremlin" + "github.com/google/cayley/mql" + "github.com/google/cayley/nquads" +) + +func trace(s string) (string, time.Time) { + return s, time.Now() +} + +func un(s string, startTime time.Time) { + endTime := time.Now() + + fmt.Printf(s, float64(endTime.UnixNano()-startTime.UnixNano())/float64(1E6)) +} + +func RunQuery(query string, ses graph.Session) { + nResults := 0 + startTrace, startTime := trace("Elapsed time: %g ms\n\n") + defer func() { + if nResults > 0 { + un(startTrace, startTime) + } + }() + fmt.Printf("\n") + c := make(chan interface{}, 5) + go ses.ExecInput(query, c, 100) + for res := range c { + fmt.Print(ses.ToText(res)) + nResults++ + } + if nResults > 0 { + fmt.Printf("-----------\n%d Results\n", nResults) + } +} + +func CayleyRepl(ts graph.TripleStore, queryLanguage string, config *cfg.CayleyConfig) { + var ses graph.Session + switch queryLanguage { + case "sexp": + ses = sexp.NewSexpSession(ts) + case "mql": + ses = mql.NewMqlSession(ts) + case "gremlin": + fallthrough + default: + ses = gremlin.NewGremlinSession(ts, config.GremlinTimeout, true) + } + inputBf := bufio.NewReader(os.Stdin) + line := "" + for { + if line == "" { + fmt.Print("cayley> ") + } else { + fmt.Print("... ") + } + l, pre, err := inputBf.ReadLine() + if err == io.EOF { + if line != "" { + line = "" + } else { + break + } + } + if err != nil { + line = "" + } + if pre { + panic("Line too long") + } + line += string(l) + if line == "" { + continue + } + if strings.HasPrefix(line, ":debug") { + ses.ToggleDebug() + fmt.Println("Debug Toggled") + line = "" + continue + } + if strings.HasPrefix(line, ":a") { + var tripleStmt = line[3:] + triple := nquads.ParseLineToTriple(tripleStmt) + if triple == nil { + fmt.Println("Not a valid triple.") + line = "" + continue + } + ts.AddTriple(triple) + line = "" + continue + } + if strings.HasPrefix(line, ":d") { + var tripleStmt = line[3:] + triple := nquads.ParseLineToTriple(tripleStmt) + if triple == nil { + fmt.Println("Not a valid triple.") + line = "" + continue + } + ts.RemoveTriple(triple) + line = "" + continue + } + result, err := ses.InputParses(line) + switch result { + case graph.Parsed: + RunQuery(line, ses) + line = "" + case graph.ParseFail: + fmt.Println("Error: ", err) + line = "" + case graph.ParseMore: + default: + } + } +} diff --git a/src/cayley/main.go b/src/cayley/main.go deleted file mode 100644 index c2354d9..0000000 --- a/src/cayley/main.go +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - cayley "cayley_cmd" - cfg "cayley_config" - cayley_http "cayley_http" - "flag" - "fmt" - "github.com/barakmich/glog" - "graph" - "os" - "runtime" -) - -var tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.") -var cpuprofile = flag.String("prof", "", "Output profiling file.") -var queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") -var configFile = flag.String("config", "", "Path to an explicit configuration file.") - -func Usage() { - fmt.Println("Cayley is a graph store and graph query layer.\n") - fmt.Println("Usage:") - fmt.Println(" cayley COMMAND [flags]\n") - fmt.Println("Commands:") - fmt.Println(" init\tCreate an empty database.") - fmt.Println(" load\tBulk-load a triple file into the database.") - fmt.Println(" http\tServe an HTTP endpoint on the given host and port.") - fmt.Println(" repl\tDrop into a REPL of the given query language.") - fmt.Println("\nFlags:") - flag.Parse() - flag.PrintDefaults() -} - -func main() { - // No command? It's time for usage. - if len(os.Args) == 1 { - Usage() - os.Exit(1) - } - cmd := os.Args[1] - newargs := make([]string, 0) - newargs = append(newargs, os.Args[0]) - newargs = append(newargs, os.Args[2:]...) - os.Args = newargs - flag.Parse() - var ts graph.TripleStore - config := cfg.ParseConfigFromFlagsAndFile(*configFile) - if os.Getenv("GOMAXPROCS") == "" { - runtime.GOMAXPROCS(runtime.NumCPU()) - glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU()) - } else { - glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting") - } - switch cmd { - case "init": - cayley.CayleyInit(config, *tripleFile) - case "load": - ts = cayley.OpenTSFromConfig(config) - cayley.CayleyLoad(ts, config, *tripleFile, false) - ts.Close() - case "repl": - ts = cayley.OpenTSFromConfig(config) - cayley.CayleyRepl(ts, *queryLanguage, config) - ts.Close() - case "http": - ts = cayley.OpenTSFromConfig(config) - cayley_http.CayleyHTTP(ts, config) - ts.Close() - default: - fmt.Println("No command", cmd) - flag.Usage() - } -} diff --git a/src/cayley_cmd/cayley-init.go b/src/cayley_cmd/cayley-init.go deleted file mode 100644 index 74226c3..0000000 --- a/src/cayley_cmd/cayley-init.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley - -import ( - cfg "cayley_config" - "graph_leveldb" - "graph_mongo" -) - -func CayleyInit(config *cfg.CayleyConfig, triplePath string) bool { - created := false - dbpath := config.DatabasePath - switch config.DatabaseType { - case "mongo", "mongodb": - created = graph_mongo.CreateNewMongoGraph(dbpath, config.DatabaseOptions) - case "leveldb": - created = graph_leveldb.CreateNewLevelDB(dbpath) - case "mem": - return true - } - if created && triplePath != "" { - ts := OpenTSFromConfig(config) - CayleyLoad(ts, config, triplePath, true) - ts.Close() - } - return created -} diff --git a/src/cayley_cmd/cayley-load.go b/src/cayley_cmd/cayley-load.go deleted file mode 100644 index 19b242c..0000000 --- a/src/cayley_cmd/cayley-load.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley - -import ( - cfg "cayley_config" - "github.com/barakmich/glog" - "graph" - "graph_mongo" - "nquads" - "os" -) - -func CayleyLoad(ts graph.TripleStore, config *cfg.CayleyConfig, triplePath string, firstTime bool) { - switch config.DatabaseType { - case "mongo", "mongodb": - if firstTime { - loadMongo(ts.(*graph_mongo.MongoTripleStore), triplePath) - } else { - LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) - } - case "rethink", "rethinkdb": - LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) - case "leveldb": - LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) - case "mem": - LoadTriplesFromFileInto(ts, triplePath, config.LoadSize) - } - -} - -func loadMongo(ts *graph_mongo.MongoTripleStore, path string) { - tChan := make(chan *graph.Triple) - go ReadTriplesFromFile(tChan, path) - ts.BulkLoad(tChan) -} - -func ReadTriplesFromFile(c chan *graph.Triple, tripleFile string) { - f, err := os.Open(tripleFile) - if err != nil { - glog.Fatalln("Couldn't open file", tripleFile) - } - - defer func() { - if err := f.Close(); err != nil { - glog.Fatalln(err) - } - }() - - nquads.ReadNQuadsFromReader(c, f) -} - -func LoadTriplesFromFileInto(ts graph.TripleStore, filename string, loadSize int) { - tChan := make(chan *graph.Triple) - go ReadTriplesFromFile(tChan, filename) - tripleblock := make([]*graph.Triple, loadSize) - i := 0 - for t := range tChan { - tripleblock[i] = t - i++ - if i == loadSize { - ts.AddTripleSet(tripleblock) - i = 0 - } - } - ts.AddTripleSet(tripleblock[0:i]) -} diff --git a/src/cayley_cmd/cayley-open.go b/src/cayley_cmd/cayley-open.go deleted file mode 100644 index c262571..0000000 --- a/src/cayley_cmd/cayley-open.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley - -import ( - "graph" - - cfg "cayley_config" - "github.com/barakmich/glog" - "graph_leveldb" - "graph_memstore" - "graph_mongo" -) - -func OpenTSFromConfig(config *cfg.CayleyConfig) graph.TripleStore { - glog.Infof("Opening database \"%s\" at %s", config.DatabaseType, config.DatabasePath) - switch config.DatabaseType { - case "mongo", "mongodb": - return graph_mongo.NewMongoTripleStore(config.DatabasePath, config.DatabaseOptions) - case "leveldb": - return graph_leveldb.NewDefaultLevelDBTripleStore(config.DatabasePath, config.DatabaseOptions) - case "mem": - ts := graph_memstore.NewMemTripleStore() - CayleyLoad(ts, config, config.DatabasePath, true) - return ts - } - panic("Unsupported database backend " + config.DatabaseType) -} diff --git a/src/cayley_cmd/cayley-repl.go b/src/cayley_cmd/cayley-repl.go deleted file mode 100644 index 9c4757a..0000000 --- a/src/cayley_cmd/cayley-repl.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley - -import ( - "bufio" - cfg "cayley_config" - "fmt" - "graph" - sexp "graph_sexp" - "gremlin" - "io" - "mql" - "nquads" - "os" - "strings" - "time" -) - -func trace(s string) (string, time.Time) { - return s, time.Now() -} - -func un(s string, startTime time.Time) { - endTime := time.Now() - - fmt.Printf(s, float64(endTime.UnixNano()-startTime.UnixNano())/float64(1E6)) -} - -func RunQuery(query string, ses graph.Session) { - nResults := 0 - startTrace, startTime := trace("Elapsed time: %g ms\n\n") - defer func() { - if nResults > 0 { - un(startTrace, startTime) - } - }() - fmt.Printf("\n") - c := make(chan interface{}, 5) - go ses.ExecInput(query, c, 100) - for res := range c { - fmt.Print(ses.ToText(res)) - nResults++ - } - if nResults > 0 { - fmt.Printf("-----------\n%d Results\n", nResults) - } -} - -func CayleyRepl(ts graph.TripleStore, queryLanguage string, config *cfg.CayleyConfig) { - var ses graph.Session - switch queryLanguage { - case "sexp": - ses = sexp.NewSexpSession(ts) - case "mql": - ses = mql.NewMqlSession(ts) - case "gremlin": - fallthrough - default: - ses = gremlin.NewGremlinSession(ts, config.GremlinTimeout, true) - } - inputBf := bufio.NewReader(os.Stdin) - line := "" - for { - if line == "" { - fmt.Print("cayley> ") - } else { - fmt.Print("... ") - } - l, pre, err := inputBf.ReadLine() - if err == io.EOF { - if line != "" { - line = "" - } else { - break - } - } - if err != nil { - line = "" - } - if pre { - panic("Line too long") - } - line += string(l) - if line == "" { - continue - } - if strings.HasPrefix(line, ":debug") { - ses.ToggleDebug() - fmt.Println("Debug Toggled") - line = "" - continue - } - if strings.HasPrefix(line, ":a") { - var tripleStmt = line[3:] - triple := nquads.ParseLineToTriple(tripleStmt) - if triple == nil { - fmt.Println("Not a valid triple.") - line = "" - continue - } - ts.AddTriple(triple) - line = "" - continue - } - if strings.HasPrefix(line, ":d") { - var tripleStmt = line[3:] - triple := nquads.ParseLineToTriple(tripleStmt) - if triple == nil { - fmt.Println("Not a valid triple.") - line = "" - continue - } - ts.RemoveTriple(triple) - line = "" - continue - } - result, err := ses.InputParses(line) - switch result { - case graph.Parsed: - RunQuery(line, ses) - line = "" - case graph.ParseFail: - fmt.Println("Error: ", err) - line = "" - case graph.ParseMore: - default: - } - } -} diff --git a/src/cayley_config/cayley-config.go b/src/cayley_config/cayley-config.go deleted file mode 100644 index 8e19d6c..0000000 --- a/src/cayley_config/cayley-config.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley_config - -import ( - "encoding/json" - "flag" - "github.com/barakmich/glog" - "os" -) - -type CayleyConfig struct { - DatabaseType string `json:"database"` - DatabasePath string `json:"db_path"` - DatabaseOptions map[string]interface{} `json:"db_options"` - ListenHost string `json:"listen_host"` - ListenPort string `json:"listen_port"` - ReadOnly bool `json:"read_only"` - GremlinTimeout int `json:"gremlin_timeout"` - LoadSize int `json:"load_size"` -} - -var databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.") -var databaseBackend = flag.String("db", "mem", "Database Backend.") -var host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).") -var loadSize = flag.Int("load_size", 10000, "Size of triplesets to load") -var port = flag.String("port", "64210", "Port to listen on.") -var readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.") -var gremlinTimeout = flag.Int("gremlin_timeout", 30, "Number of seconds until an individual query times out.") - -func ParseConfigFromFile(filename string) *CayleyConfig { - config := &CayleyConfig{} - if filename == "" { - return config - } - f, err := os.Open(filename) - if err != nil { - glog.Fatalln("Couldn't open config file", filename) - } - - defer f.Close() - - dec := json.NewDecoder(f) - err = dec.Decode(config) - if err != nil { - glog.Fatalln("Couldn't read config file:", err) - } - return config -} - -func ParseConfigFromFlagsAndFile(fileFlag string) *CayleyConfig { - // Find the file... - var trueFilename string - if fileFlag != "" { - if _, err := os.Stat(fileFlag); os.IsNotExist(err) { - glog.Fatalln("Cannot find specified configuration file", fileFlag, ", aborting.") - } else { - trueFilename = fileFlag - } - } else { - if _, err := os.Stat(os.Getenv("CAYLEY_CFG")); err == nil { - trueFilename = os.Getenv("CAYLEY_CFG") - } else { - if _, err := os.Stat("/etc/cayley.cfg"); err == nil { - trueFilename = "/etc/cayley.cfg" - } - } - } - if trueFilename == "" { - glog.Infoln("Couldn't find a config file in either $CAYLEY_CFG or /etc/cayley.cfg. Going by flag defaults only.") - } - config := ParseConfigFromFile(trueFilename) - - if config.DatabasePath == "" { - config.DatabasePath = *databasePath - } - - if config.DatabaseType == "" { - config.DatabaseType = *databaseBackend - } - - if config.ListenHost == "" { - config.ListenHost = *host - } - - if config.ListenPort == "" { - config.ListenPort = *port - } - - if config.GremlinTimeout == 0 { - config.GremlinTimeout = *gremlinTimeout - } - - if config.LoadSize == 0 { - config.LoadSize = *loadSize - } - - config.ReadOnly = config.ReadOnly || *readOnly - - return config -} diff --git a/src/cayley_http/cayley-http-docs.go b/src/cayley_http/cayley-http-docs.go deleted file mode 100644 index d90e8d9..0000000 --- a/src/cayley_http/cayley-http-docs.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley_http - -import ( - "fmt" - "github.com/julienschmidt/httprouter" - "github.com/russross/blackfriday" - "io/ioutil" - "net/http" - "os" -) - -type DocRequestHandler struct { -} - -func MarkdownWithCSS(input []byte, title string) []byte { - // set up the HTML renderer - htmlFlags := 0 - htmlFlags |= blackfriday.HTML_USE_XHTML - htmlFlags |= blackfriday.HTML_USE_SMARTYPANTS - htmlFlags |= blackfriday.HTML_SMARTYPANTS_FRACTIONS - htmlFlags |= blackfriday.HTML_SMARTYPANTS_LATEX_DASHES - htmlFlags |= blackfriday.HTML_COMPLETE_PAGE - renderer := blackfriday.HtmlRenderer(htmlFlags, title, markdownCSS) - - // set up the parser - extensions := 0 - //extensions |= blackfriday.EXTENSION_NO_INTRA_EMPHASIS - extensions |= blackfriday.EXTENSION_TABLES - extensions |= blackfriday.EXTENSION_FENCED_CODE - extensions |= blackfriday.EXTENSION_AUTOLINK - extensions |= blackfriday.EXTENSION_STRIKETHROUGH - //extensions |= blackfriday.EXTENSION_SPACE_HEADERS - extensions |= blackfriday.EXTENSION_HEADER_IDS - extensions |= blackfriday.EXTENSION_LAX_HTML_BLOCKS - - return blackfriday.Markdown(input, renderer, extensions) -} - -func (h *DocRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Request, params httprouter.Params) { - docpage := params.ByName("docpage") - if docpage == "" { - docpage = "Index" - } - file, err := os.Open(fmt.Sprintf("docs/%s.md", docpage)) - if err != nil { - http.Error(w, err.Error(), http.StatusNotFound) - return - } - data, err := ioutil.ReadAll(file) - if err != nil { - http.Error(w, err.Error(), http.StatusNoContent) - return - } - output := MarkdownWithCSS(data, fmt.Sprintf("Cayley Docs - %s", docpage)) - fmt.Fprint(w, string(output)) -} - -var markdownCSS = "/static/css/docs.css" diff --git a/src/cayley_http/cayley-http-query.go b/src/cayley_http/cayley-http-query.go deleted file mode 100644 index c3a91e6..0000000 --- a/src/cayley_http/cayley-http-query.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley_http - -import ( - "encoding/json" - "fmt" - "github.com/julienschmidt/httprouter" - "graph" - "gremlin" - "io/ioutil" - "mql" - "net/http" -) - -type SuccessQueryWrapper struct { - Result interface{} `json:"result"` -} - -type ErrorQueryWrapper struct { - Error string `json:"error"` -} - -func WrapErrResult(err error) ([]byte, error) { - var wrap ErrorQueryWrapper - wrap.Error = err.Error() - return json.MarshalIndent(wrap, "", " ") -} - -func WrapResult(result interface{}) ([]byte, error) { - var wrap SuccessQueryWrapper - wrap.Result = result - return json.MarshalIndent(wrap, "", " ") -} - -func RunJsonQuery(query string, ses graph.HttpSession) (interface{}, error) { - c := make(chan interface{}, 5) - go ses.ExecInput(query, c, 100) - for res := range c { - ses.BuildJson(res) - } - return ses.GetJson() -} - -func GetQueryShape(query string, ses graph.HttpSession) ([]byte, error) { - c := make(chan map[string]interface{}, 5) - go ses.GetQuery(query, c) - var data map[string]interface{} - for res := range c { - data = res - } - return json.Marshal(data) -} - -// TODO(barakmich): Turn this into proper middleware. -func (api *Api) ServeV1Query(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { - var ses graph.HttpSession - switch params.ByName("query_lang") { - case "gremlin": - ses = gremlin.NewGremlinSession(api.ts, api.config.GremlinTimeout, false) - case "mql": - ses = mql.NewMqlSession(api.ts) - default: - return FormatJson400(w, "Need a query language.") - } - var err error - bodyBytes, err := ioutil.ReadAll(r.Body) - if err != nil { - return FormatJson400(w, err) - } - code := string(bodyBytes) - result, err := ses.InputParses(code) - switch result { - case graph.Parsed: - var output interface{} - var bytes []byte - var err error - output, err = RunJsonQuery(code, ses) - if err != nil { - bytes, err = WrapErrResult(err) - http.Error(w, string(bytes), 400) - ses = nil - return 400 - } - bytes, err = WrapResult(output) - if err != nil { - ses = nil - return FormatJson400(w, err) - } - fmt.Fprint(w, string(bytes)) - ses = nil - return 200 - case graph.ParseFail: - ses = nil - return FormatJson400(w, err) - default: - ses = nil - return FormatJsonError(w, 500, "Incomplete data?") - } - http.Error(w, "", http.StatusNotFound) - ses = nil - return http.StatusNotFound -} - -func (api *Api) ServeV1Shape(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { - var ses graph.HttpSession - switch params.ByName("query_lang") { - case "gremlin": - ses = gremlin.NewGremlinSession(api.ts, api.config.GremlinTimeout, false) - case "mql": - ses = mql.NewMqlSession(api.ts) - default: - return FormatJson400(w, "Need a query language.") - } - var err error - bodyBytes, err := ioutil.ReadAll(r.Body) - if err != nil { - return FormatJson400(w, err) - } - code := string(bodyBytes) - result, err := ses.InputParses(code) - switch result { - case graph.Parsed: - var output []byte - var err error - output, err = GetQueryShape(code, ses) - if err != nil { - return FormatJson400(w, err) - } - fmt.Fprint(w, string(output)) - return 200 - case graph.ParseFail: - return FormatJson400(w, err) - default: - return FormatJsonError(w, 500, "Incomplete data?") - } - http.Error(w, "", http.StatusNotFound) - return http.StatusNotFound -} diff --git a/src/cayley_http/cayley-http-write.go b/src/cayley_http/cayley-http-write.go deleted file mode 100644 index a6fdcb6..0000000 --- a/src/cayley_http/cayley-http-write.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley_http - -import ( - "encoding/json" - "errors" - "fmt" - "github.com/barakmich/glog" - "github.com/julienschmidt/httprouter" - "graph" - "io/ioutil" - "net/http" - "nquads" - "strconv" -) - -func ParseJsonToTripleList(jsonBody []byte) ([]*graph.Triple, error) { - var tripleList []*graph.Triple - err := json.Unmarshal(jsonBody, &tripleList) - if err != nil { - return nil, err - } - for i, t := range tripleList { - if !t.IsValid() { - return nil, errors.New(fmt.Sprintf("Invalid triple at index %d. %s", i, t.ToString())) - } - } - return tripleList, nil -} - -func (api *Api) ServeV1Write(w http.ResponseWriter, r *http.Request, _ httprouter.Params) int { - if api.config.ReadOnly { - return FormatJson400(w, "Database is read-only.") - } - bodyBytes, err := ioutil.ReadAll(r.Body) - if err != nil { - return FormatJson400(w, err) - } - tripleList, terr := ParseJsonToTripleList(bodyBytes) - if terr != nil { - return FormatJson400(w, terr) - } - api.ts.AddTripleSet(tripleList) - fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", len(tripleList)) - return 200 -} - -func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { - if api.config.ReadOnly { - return FormatJson400(w, "Database is read-only.") - } - - formFile, _, err := r.FormFile("NQuadFile") - if err != nil { - glog.Errorln(err) - return FormatJsonError(w, 500, "Couldn't read file: "+err.Error()) - } - - defer formFile.Close() - - blockSize, blockErr := strconv.ParseInt(r.URL.Query().Get("block_size"), 10, 64) - if blockErr != nil { - blockSize = int64(api.config.LoadSize) - } - - tChan := make(chan *graph.Triple) - go nquads.ReadNQuadsFromReader(tChan, formFile) - tripleblock := make([]*graph.Triple, blockSize) - nTriples := 0 - i := int64(0) - for t := range tChan { - tripleblock[i] = t - i++ - nTriples++ - if i == blockSize { - api.ts.AddTripleSet(tripleblock) - i = 0 - } - } - api.ts.AddTripleSet(tripleblock[0:i]) - fmt.Fprintf(w, "{\"result\": \"Successfully wrote %d triples.\"}", nTriples) - return 200 -} - -func (api *Api) ServeV1Delete(w http.ResponseWriter, r *http.Request, params httprouter.Params) int { - if api.config.ReadOnly { - return FormatJson400(w, "Database is read-only.") - } - bodyBytes, err := ioutil.ReadAll(r.Body) - if err != nil { - return FormatJson400(w, err) - } - tripleList, terr := ParseJsonToTripleList(bodyBytes) - if terr != nil { - return FormatJson400(w, terr) - } - count := 0 - for _, triple := range tripleList { - api.ts.RemoveTriple(triple) - count++ - } - fmt.Fprintf(w, "{\"result\": \"Successfully deleted %d triples.\"}", count) - return 200 -} diff --git a/src/cayley_http/cayley-http.go b/src/cayley_http/cayley-http.go deleted file mode 100644 index 35acab1..0000000 --- a/src/cayley_http/cayley-http.go +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley_http - -import ( - cfg "cayley_config" - "fmt" - "github.com/barakmich/glog" - "github.com/julienschmidt/httprouter" - "graph" - "html/template" - "net/http" - "time" -) - -type ResponseHandler func(http.ResponseWriter, *http.Request, httprouter.Params) int - -func LogRequest(handler ResponseHandler) httprouter.Handle { - return func(w http.ResponseWriter, req *http.Request, params httprouter.Params) { - start := time.Now() - addr := req.Header.Get("X-Real-IP") - if addr == "" { - addr = req.Header.Get("X-Forwarded-For") - if addr == "" { - addr = req.RemoteAddr - } - } - glog.Infof("Started %s %s for %s", req.Method, req.URL.Path, addr) - code := handler(w, req, params) - glog.Infof("Completed %v %s %s in %v", code, http.StatusText(code), req.URL.Path, time.Since(start)) - - } -} - -func FormatJson400(w http.ResponseWriter, err interface{}) int { - return FormatJsonError(w, 400, err) -} - -func FormatJsonError(w http.ResponseWriter, code int, err interface{}) int { - http.Error(w, fmt.Sprintf("{\"error\" : \"%s\"}", err), code) - return code -} - -type TemplateRequestHandler struct { - templates *template.Template -} - -func (h *TemplateRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Request, params httprouter.Params) { - uiType := params.ByName("ui_type") - if r.URL.Path == "/" { - uiType = "query" - } - err := h.templates.ExecuteTemplate(w, uiType+".html", h) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - } -} - -type Api struct { - config *cfg.CayleyConfig - ts graph.TripleStore -} - -func (api *Api) ApiV1(r *httprouter.Router) { - r.POST("/api/v1/query/:query_lang", LogRequest(api.ServeV1Query)) - r.POST("/api/v1/shape/:query_lang", LogRequest(api.ServeV1Shape)) - r.POST("/api/v1/write", LogRequest(api.ServeV1Write)) - r.POST("/api/v1/write/file/nquad", LogRequest(api.ServeV1WriteNQuad)) - //TODO(barakmich): /write/text/nquad, which reads from request.body instead of HTML5 file form? - r.POST("/api/v1/delete", LogRequest(api.ServeV1Delete)) -} - -func SetupRoutes(ts graph.TripleStore, config *cfg.CayleyConfig) { - r := httprouter.New() - var templates = template.Must(template.ParseGlob("templates/*.tmpl")) - templates.ParseGlob("templates/*.html") - root := &TemplateRequestHandler{templates: templates} - docs := &DocRequestHandler{} - api := &Api{config: config, ts: ts} - api.ApiV1(r) - - //m.Use(martini.Static("static", martini.StaticOptions{Prefix: "/static", SkipLogging: true})) - //r.Handler("GET", "/static", http.StripPrefix("/static", http.FileServer(http.Dir("static/")))) - r.GET("/docs/:docpage", docs.ServeHTTP) - r.GET("/ui/:ui_type", root.ServeHTTP) - r.GET("/", root.ServeHTTP) - http.Handle("/static/", http.StripPrefix("/static", http.FileServer(http.Dir("static/")))) - http.Handle("/", r) -} - -func CayleyHTTP(ts graph.TripleStore, config *cfg.CayleyConfig) { - SetupRoutes(ts, config) - glog.Infof("Cayley now listening on %s:%s\n", config.ListenHost, config.ListenPort) - fmt.Printf("Cayley now listening on %s:%s\n", config.ListenHost, config.ListenPort) - err := http.ListenAndServe(fmt.Sprintf("%s:%s", config.ListenHost, config.ListenPort), nil) - if err != nil { - glog.Fatal("ListenAndServe: ", err) - } -} diff --git a/src/cayley_http/cayley-http_test.go b/src/cayley_http/cayley-http_test.go deleted file mode 100644 index 941b6da..0000000 --- a/src/cayley_http/cayley-http_test.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cayley_http - -import ( - . "github.com/smartystreets/goconvey/convey" - "testing" -) - -func TestParseJSONOkay(t *testing.T) { - Convey("Parse JSON", t, func() { - bytelist := []byte(`[ - {"subject": "foo", "predicate": "bar", "object": "baz"}, - {"subject": "foo", "predicate": "bar", "object": "baz", "provenance": "graph"} - ]`) - x, err := ParseJsonToTripleList(bytelist) - So(err, ShouldBeNil) - So(len(x), ShouldEqual, 2) - So(x[0].Sub, ShouldEqual, "foo") - So(x[0].Provenance, ShouldEqual, "") - So(x[1].Provenance, ShouldEqual, "graph") - }) - - Convey("Parse JSON extra field", t, func() { - bytelist := []byte(`[ - {"subject": "foo", "predicate": "bar", "object": "foo", "something_else": "extra data"} - ]`) - _, err := ParseJsonToTripleList(bytelist) - So(err, ShouldBeNil) - }) -} - -func TestParseJSONFail(t *testing.T) { - Convey("Parse JSON Fail", t, func() { - bytelist := []byte(`[ - {"subject": "foo", "predicate": "bar"} - ]`) - _, err := ParseJsonToTripleList(bytelist) - So(err, ShouldNotBeNil) - }) -} diff --git a/src/graph/all-iterator.go b/src/graph/all-iterator.go deleted file mode 100644 index 6068746..0000000 --- a/src/graph/all-iterator.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the All iterator. Which, logically -// enough, represents all nodes or all links in the graph. -// -// This particular file is actually vestigal. It's up to the TripleStore to give -// us an All iterator that represents all things in the graph. So this is -// really the All iterator for the MemTripleStore. That said, it *is* one of -// the base iterators, and it helps just to see it here. - -import ( - "fmt" - "strings" -) - -// An All iterator across a range of int64 values, from `max` to `min`. -type Int64AllIterator struct { - BaseIterator - max, min int64 - at int64 -} - -// Creates a new Int64AllIterator with the given range. -func NewInt64AllIterator(min, max int64) *Int64AllIterator { - var all Int64AllIterator - BaseIteratorInit(&all.BaseIterator) - all.max = max - all.min = min - all.at = min - return &all -} - -// Start back at the beginning -func (a *Int64AllIterator) Reset() { - a.at = a.min -} - -func (a *Int64AllIterator) Close() { -} - -func (a *Int64AllIterator) Clone() Iterator { - out := NewInt64AllIterator(a.min, a.max) - out.CopyTagsFrom(a) - return out -} - -// Prints the All iterator as just an "all". -func (a *Int64AllIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s)", strings.Repeat(" ", indent), a.Type()) -} - -// Next() on an Int64 all iterator is a simple incrementing counter. -// Return the next integer, and mark it as the result. -func (a *Int64AllIterator) Next() (TSVal, bool) { - NextLogIn(a) - if a.at == -1 { - return NextLogOut(a, nil, false) - } - val := a.at - a.at = a.at + 1 - if a.at > a.max { - a.at = -1 - } - a.Last = val - return NextLogOut(a, val, true) -} - -// The number of elements in an Int64AllIterator is the size of the range. -// The size is exact. -func (a *Int64AllIterator) Size() (int64, bool) { - Size := ((a.max - a.min) + 1) - return Size, true -} - -// Check() for an Int64AllIterator is merely seeing if the passed value is -// withing the range, assuming the value is an int64. -func (a *Int64AllIterator) Check(tsv TSVal) bool { - CheckLogIn(a, tsv) - v := tsv.(int64) - if a.min <= v && v <= a.max { - a.Last = v - return CheckLogOut(a, v, true) - } - return CheckLogOut(a, v, false) -} - -// The type of this iterator is an "all". This is important, as it puts it in -// the class of "all iterators. -func (a *Int64AllIterator) Type() string { return "all" } - -// There's nothing to optimize about this little iterator. -func (a *Int64AllIterator) Optimize() (Iterator, bool) { return a, false } - -// Stats for an Int64AllIterator are simple. Super cheap to do any operation, -// and as big as the range. -func (a *Int64AllIterator) GetStats() *IteratorStats { - s, _ := a.Size() - return &IteratorStats{ - CheckCost: 1, - NextCost: 1, - Size: s, - } -} diff --git a/src/graph/and-iterator-optimize.go b/src/graph/and-iterator-optimize.go deleted file mode 100644 index 950d681..0000000 --- a/src/graph/and-iterator-optimize.go +++ /dev/null @@ -1,330 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Perhaps the most tricky file in this entire module. Really a method on the -// AndIterator, but important enough to deserve its own file. -// -// Calling Optimize() on an And iterator, like any iterator, requires that we -// preserve the underlying meaning. However, the And has many choices, namely, -// which one of it's subiterators will be the branch that does the Next()ing, -// and which ordering of the remaining iterators is the most efficient. In -// short, this is where a lot of the query optimization happens, and there are -// many wins to be had here, as well as many bad bugs. The worst class of bug -// changes the meaning of the query. The second worst class makes things really -// slow. -// -// The good news is this: If Optimize() is never called (turned off, perhaps) we can -// be sure the results are as good as the query language called for. -// -// In short, tread lightly. - -import ( - "container/list" -) - -// Optimizes the AndIterator, by picking the most efficient way to Next() and -// Check() its subiterators. For SQL fans, this is equivalent to JOIN. -func (and *AndIterator) Optimize() (Iterator, bool) { - // First, let's get the list of iterators, in order (first one is Next()ed, - // the rest are Check()ed) - oldItList := and.GetSubIterators() - - // And call Optimize() on our subtree, replacing each one in the order we - // found them. it_list is the newly optimized versions of these, and changed - // is another list, of only the ones that have returned replacements and - // changed. - itList := optimizeSubIterators(oldItList) - - // Close the replaced iterators (they ought to close themselves, but Close() - // is idempotent, so this just protects against any machinations). - closeIteratorList(oldItList, nil) - - // If we can find only one subiterator which is equivalent to this whole and, - // we can replace the And... - out := and.optimizeReplacement(itList) - if out != nil { - // ...Move the tags to the replacement... - moveTagsTo(out, and) - // ...Close everyone except `out`, our replacement... - closeIteratorList(itList, out) - // ...And return it. - return out, true - } - - // And now, without changing any of the iterators, we reorder them. it_list is - // now a permutation of itself, but the contents are unchanged. - itList = optimizeOrder(itList) - - // Okay! At this point we have an optimized order. - - // The easiest thing to do at this point is merely to create a new And iterator - // and replace ourselves with our (reordered, optimized) clone. - newAnd := NewAndIterator() - - // Add the subiterators in order. - for e := itList.Front(); e != nil; e = e.Next() { - newAnd.AddSubIterator(e.Value.(Iterator)) - } - - // Move the tags hanging on us (like any good replacement). - newAnd.CopyTagsFrom(and) - - newAnd.optimizeCheck() - - // And close ourselves but not our subiterators -- some may still be alive in - // the new And (they were unchanged upon calling Optimize() on them, at the - // start). - and.cleanUp() - return newAnd, true -} - -// Closes a list of iterators, except the one passed in `except`. Closes all -// of the iterators in the list if `except` is nil. -func closeIteratorList(l *list.List, except Iterator) { - for e := l.Front(); e != nil; e = e.Next() { - it := e.Value.(Iterator) - if it != except { - e.Value.(Iterator).Close() - } - } -} - -// Find if there is a single subiterator which is a valid replacement for this -// AndIterator. -func (and *AndIterator) optimizeReplacement(itList *list.List) Iterator { - // If we were created with no SubIterators, we're as good as Null. - if itList.Len() == 0 { - return &NullIterator{} - } - if itList.Len() == 1 { - // When there's only one iterator, there's only one choice. - return itList.Front().Value.(Iterator) - } - // If any of our subiterators, post-optimization, are also Null, then - // there's no point in continuing the branch, we will have no results - // and we are null as well. - if hasAnyNullIterators(itList) { - return &NullIterator{} - } - - // If we have one useful iterator, use that. - it := hasOneUsefulIterator(itList) - if it != nil { - return it - } - return nil -} - -// optimizeOrder(l) takes a list and returns a list, containing the same contents -// but with a new ordering, however it wishes. -func optimizeOrder(l *list.List) *list.List { - out := list.New() - var bestIt Iterator - bestCost := int64(1 << 62) - // bad contains iterators that can't be (efficiently) nexted, such as - // "optional" or "not". Separate them out and tack them on at the end. - bad := list.New() - - // Find the iterator with the projected "best" total cost. - // Total cost is defined as The Next()ed iterator's cost to Next() out - // all of it's contents, and to Check() each of those against everyone - // else. - for e := l.Front(); e != nil; e = e.Next() { - it := e.Value.(Iterator) - if !it.Nextable() { - bad.PushBack(it) - continue - } - rootStats := e.Value.(Iterator).GetStats() - projectedCost := rootStats.NextCost - for f := l.Front(); f != nil; f = f.Next() { - if !f.Value.(Iterator).Nextable() { - continue - } - if f == e { - continue - } - stats := f.Value.(Iterator).GetStats() - projectedCost += stats.CheckCost - } - projectedCost = projectedCost * rootStats.Size - if projectedCost < bestCost { - bestIt = it - bestCost = projectedCost - } - } - - // TODO(barakmich): Optimization of order need not stop here. Picking a smart - // Check() order based on probability of getting a false Check() first is - // useful (fail faster). - - // Put the best iterator (the one we wish to Next()) at the front... - out.PushBack(bestIt) - // ...And push everyone else after... - for e := l.Front(); e != nil; e = e.Next() { - thisIt := e.Value.(Iterator) - if !thisIt.Nextable() { - continue - } - if thisIt != bestIt { - out.PushBack(thisIt) - } - } - // ...And finally, the difficult children on the end. - out.PushBackList(bad) - return out -} - -// optimizeCheck(l) creates an alternate check list, containing the same contents -// but with a new ordering, however it wishes. -func (and *AndIterator) optimizeCheck() { - subIts := and.GetSubIterators() - out := list.New() - - // Find the iterator with the lowest Check() cost, push it to the front, repeat. - for subIts.Len() != 0 { - var best *list.Element - bestCost := int64(1 << 62) - for e := subIts.Front(); e != nil; e = e.Next() { - it := e.Value.(Iterator) - rootStats := it.GetStats() - projectedCost := rootStats.CheckCost - if projectedCost < bestCost { - best = e - bestCost = projectedCost - } - } - out.PushBack(best.Value) - subIts.Remove(best) - } - - and.checkList = out -} - -// If we're replacing ourselves by a single iterator, we need to grab the -// result tags from the iterators that, while still valid and would hold -// the same values as this and, are not going to stay. -// getSubTags() returns a map of the tags for all the subiterators. -func (and *AndIterator) getSubTags() map[string]bool { - subs := and.GetSubIterators() - tags := make(map[string]bool) - for e := subs.Front(); e != nil; e = e.Next() { - it := e.Value.(Iterator) - for _, tag := range it.Tags() { - tags[tag] = true - } - } - for _, tag := range and.Tags() { - tags[tag] = true - } - return tags -} - -// moveTagsTo() gets the tags for all of the And's subiterators and the -// And itself, and moves them to `out`. -func moveTagsTo(out Iterator, and *AndIterator) { - tagmap := and.getSubTags() - for _, tag := range out.Tags() { - if tagmap[tag] { - delete(tagmap, tag) - } - } - for k, _ := range tagmap { - out.AddTag(k) - } -} - -// optimizeSubIterators(l) takes a list of iterators and calls Optimize() on all -// of them. It returns two lists -- the first contains the same list as l, where -// any replacements are made by Optimize() and the second contains the originals -// which were replaced. -func optimizeSubIterators(l *list.List) *list.List { - itList := list.New() - for e := l.Front(); e != nil; e = e.Next() { - it := e.Value.(Iterator) - newIt, change := it.Optimize() - if change { - itList.PushBack(newIt) - } else { - itList.PushBack(it.Clone()) - } - } - return itList -} - -// Check a list of iterators for any Null iterators. -func hasAnyNullIterators(l *list.List) bool { - for e := l.Front(); e != nil; e = e.Next() { - it := e.Value.(Iterator) - if it.Type() == "null" { - return true - } - } - return false -} - -// There are two "not-useful" iterators -- namely "null" which returns -// nothing, and "all" which returns everything. Particularly, we want -// to see if we're intersecting with a bunch of "all" iterators, and, -// if we are, then we have only one useful iterator. -func hasOneUsefulIterator(l *list.List) Iterator { - usefulCount := 0 - var usefulIt Iterator - for e := l.Front(); e != nil; e = e.Next() { - it := e.Value.(Iterator) - switch it.Type() { - case "null", "all": - continue - case "optional": - // Optional is weird -- it's not useful, but we can't optimize - // away from it. Therefore, we skip this optimization - // if we see one. - return nil - default: - usefulCount++ - usefulIt = it - } - } - - if usefulCount == 1 { - return usefulIt - } - return nil -} - -// and.GetStats() lives here in and-iterator-optimize.go because it may -// in the future return different statistics based on how it is optimized. -// For now, however, it's pretty static. -func (and *AndIterator) GetStats() *IteratorStats { - primaryStats := and.primaryIt.GetStats() - CheckCost := primaryStats.CheckCost - NextCost := primaryStats.NextCost - Size := primaryStats.Size - for _, it := range and.internalIterators { - stats := it.GetStats() - NextCost += stats.CheckCost - CheckCost += stats.CheckCost - if Size > stats.Size { - Size = stats.Size - } - } - return &IteratorStats{ - CheckCost: CheckCost, - NextCost: NextCost, - Size: Size, - } - -} diff --git a/src/graph/and-iterator-optimize_test.go b/src/graph/and-iterator-optimize_test.go deleted file mode 100644 index 4ecee81..0000000 --- a/src/graph/and-iterator-optimize_test.go +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Tests relating to methods in and-iterator-optimize. Many are pretty simplistic, but -// nonetheless cover a lot of basic cases. - -import ( - "reflect" - "sort" - "testing" -) - -func TestIteratorPromotion(t *testing.T) { - all := NewInt64AllIterator(1, 3) - fixed := newFixedIterator() - fixed.AddValue(3) - a := NewAndIterator() - a.AddSubIterator(all) - a.AddSubIterator(fixed) - all.AddTag("a") - fixed.AddTag("b") - a.AddTag("c") - newIt, changed := a.Optimize() - if !changed { - t.Error("Iterator didn't optimize") - } - if newIt.Type() != "fixed" { - t.Error("Expected fixed iterator") - } - tagsExpected := []string{"a", "b", "c"} - tags := newIt.Tags() - sort.Strings(tags) - if !reflect.DeepEqual(tags, tagsExpected) { - t.Fatal("Tags don't match") - } -} - -func TestNullIteratorAnd(t *testing.T) { - all := NewInt64AllIterator(1, 3) - null := NewNullIterator() - a := NewAndIterator() - a.AddSubIterator(all) - a.AddSubIterator(null) - newIt, changed := a.Optimize() - if !changed { - t.Error("Didn't change") - } - if newIt.Type() != "null" { - t.Error("Expected null iterator, got ", newIt.Type()) - } -} - -func TestReorderWithTag(t *testing.T) { - all := NewInt64AllIterator(100, 300) - all.AddTag("good") - all2 := NewInt64AllIterator(1, 30000) - all2.AddTag("slow") - a := NewAndIterator() - // Make all2 the default iterator - a.AddSubIterator(all2) - a.AddSubIterator(all) - - newIt, changed := a.Optimize() - if !changed { - t.Error("Expected new iterator") - } - expectedTags := []string{"good", "slow"} - tagsOut := make([]string, 0) - l := newIt.GetSubIterators() - for e := l.Front(); e != nil; e = e.Next() { - for _, x := range e.Value.(Iterator).Tags() { - tagsOut = append(tagsOut, x) - } - } - if !reflect.DeepEqual(expectedTags, tagsOut) { - t.Fatal("Tags don't match") - } -} - -func TestAndStatistics(t *testing.T) { - all := NewInt64AllIterator(100, 300) - all.AddTag("good") - all2 := NewInt64AllIterator(1, 30000) - all2.AddTag("slow") - a := NewAndIterator() - // Make all2 the default iterator - a.AddSubIterator(all2) - a.AddSubIterator(all) - stats1 := a.GetStats() - newIt, changed := a.Optimize() - if !changed { - t.Error("Didn't optimize") - } - stats2 := newIt.GetStats() - if stats2.NextCost > stats1.NextCost { - t.Error("And didn't optimize. Next cost old ", stats1.NextCost, "and new ", stats2.NextCost) - } -} diff --git a/src/graph/and-iterator.go b/src/graph/and-iterator.go deleted file mode 100644 index a3458aa..0000000 --- a/src/graph/and-iterator.go +++ /dev/null @@ -1,248 +0,0 @@ -// Defines the And iterator, one of the base iterators. And requires no -// knowledge of the constituent TripleStore; its sole purpose is to act as an -// intersection operator across the subiterators it is given. If one iterator -// contains [1,3,5] and another [2,3,4] -- then And is an iterator that -// 'contains' [3] -// -// It accomplishes this in one of two ways. If it is a Next()ed iterator (that -// is, it is a top level iterator, or on the "Next() path", then it will Next() -// it's primary iterator (helpfully, and.primary_it) and Check() the resultant -// value against it's other iterators. If it matches all of them, then it -// returns that value. Otherwise, it repeats the process. -// -// If it's on a Check() path, it merely Check()s every iterator, and returns the -// logical AND of each result. - -package graph - -import ( - "container/list" - "fmt" - "strings" -) - -// The And iterator. Consists of a BaseIterator and a number of subiterators, the primary of which will -// be Next()ed if next is called. -type AndIterator struct { - BaseIterator - internalIterators []Iterator - itCount int - primaryIt Iterator - checkList *list.List -} - -// Creates a new And iterator. -func NewAndIterator() *AndIterator { - var and AndIterator - BaseIteratorInit(&and.BaseIterator) - and.internalIterators = make([]Iterator, 0, 20) - and.checkList = nil - return &and -} - -// Reset all internal iterators -func (and *AndIterator) Reset() { - and.primaryIt.Reset() - for _, it := range and.internalIterators { - it.Reset() - } - and.checkList = nil -} - -func (and *AndIterator) Clone() Iterator { - newAnd := NewAndIterator() - newAnd.AddSubIterator(and.primaryIt.Clone()) - newAnd.CopyTagsFrom(and) - for _, it := range and.internalIterators { - newAnd.AddSubIterator(it.Clone()) - } - if and.checkList != nil { - newAnd.optimizeCheck() - } - return newAnd -} - -// Returns a list.List of the subiterators, in order (primary iterator first). -func (and *AndIterator) GetSubIterators() *list.List { - l := list.New() - l.PushBack(and.primaryIt) - for _, it := range and.internalIterators { - l.PushBack(it) - } - return l -} - -// Overrides BaseIterator TagResults, as it needs to add it's own results and -// recurse down it's subiterators. -func (and *AndIterator) TagResults(out *map[string]TSVal) { - and.BaseIterator.TagResults(out) - if and.primaryIt != nil { - and.primaryIt.TagResults(out) - } - for _, it := range and.internalIterators { - it.TagResults(out) - } -} - -// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators. -func (and *AndIterator) GetResultTree() *ResultTree { - tree := NewResultTree(and.LastResult()) - tree.AddSubtree(and.primaryIt.GetResultTree()) - for _, it := range and.internalIterators { - tree.AddSubtree(it.GetResultTree()) - } - return tree -} - -// Prints information about this iterator. -func (and *AndIterator) DebugString(indent int) string { - var total string - for i, it := range and.internalIterators { - total += strings.Repeat(" ", indent+2) - total += fmt.Sprintf("%d:\n%s\n", i, it.DebugString(indent+4)) - } - var tags string - for _, k := range and.Tags() { - tags += fmt.Sprintf("%s;", k) - } - spaces := strings.Repeat(" ", indent+2) - - return fmt.Sprintf("%s(%s %d\n%stags:%s\n%sprimary_it:\n%s\n%sother_its:\n%s)", - strings.Repeat(" ", indent), - and.Type(), - and.GetUid(), - spaces, - tags, - spaces, - and.primaryIt.DebugString(indent+4), - spaces, - total) -} - -// Add a subiterator to this And iterator. -// -// The first iterator that is added becomes the primary iterator. This is -// important. Calling Optimize() is the way to change the order based on -// subiterator statistics. Without Optimize(), the order added is the order -// used. -func (and *AndIterator) AddSubIterator(sub Iterator) { - if and.itCount > 0 { - and.internalIterators = append(and.internalIterators, sub) - and.itCount++ - return - } - and.primaryIt = sub - and.itCount++ -} - -// Returns the Next value from the And iterator. Because the And is the -// intersection of its subiterators, it must choose one subiterator to produce a -// candidate, and check this value against the subiterators. A productive choice -// of primary iterator is therefore very important. -func (and *AndIterator) Next() (TSVal, bool) { - NextLogIn(and) - var curr TSVal - var exists bool - for { - - curr, exists = and.primaryIt.Next() - if !exists { - return NextLogOut(and, nil, false) - } - if and.checkSubIts(curr) { - and.Last = curr - return NextLogOut(and, curr, true) - } - } - panic("Somehow broke out of Next() loop in AndIterator") -} - -// Checks a value against the non-primary iterators, in order. -func (and *AndIterator) checkSubIts(val TSVal) bool { - var subIsGood = true - for _, it := range and.internalIterators { - subIsGood = it.Check(val) - if !subIsGood { - break - } - } - return subIsGood -} - -func (and *AndIterator) checkCheckList(val TSVal) bool { - var isGood = true - for e := and.checkList.Front(); e != nil; e = e.Next() { - isGood = e.Value.(Iterator).Check(val) - if !isGood { - break - } - } - return CheckLogOut(and, val, isGood) -} - -// Check a value against the entire iterator, in order. -func (and *AndIterator) Check(val TSVal) bool { - CheckLogIn(and, val) - if and.checkList != nil { - return and.checkCheckList(val) - } - mainGood := and.primaryIt.Check(val) - if !mainGood { - return CheckLogOut(and, val, false) - } - othersGood := and.checkSubIts(val) - if !othersGood { - return CheckLogOut(and, val, false) - } - and.Last = val - return CheckLogOut(and, val, true) -} - -// Returns the approximate size of the And iterator. Because we're dealing -// with an intersection, we know that the largest we can be is the size of the -// smallest iterator. This is the heuristic we shall follow. Better heuristics -// welcome. -func (and *AndIterator) Size() (int64, bool) { - val, b := and.primaryIt.Size() - for _, it := range and.internalIterators { - newval, newb := it.Size() - if val > newval { - val = newval - } - b = newb && b - } - return val, b -} - -// An And has no NextResult of its own -- that is, there are no other values -// which satisfy our previous result that are not the result itself. Our -// subiterators might, however, so just pass the call recursively. -func (and *AndIterator) NextResult() bool { - if and.primaryIt.NextResult() { - return true - } - for _, it := range and.internalIterators { - if it.NextResult() { - return true - } - } - return false -} - -// Perform and-specific cleanup, of which there currently is none. -func (and *AndIterator) cleanUp() { -} - -// Close this iterator, and, by extension, close the subiterators. -// Close should be idempotent, and it follows that if it's subiterators -// follow this contract, the And follows the contract. -func (and *AndIterator) Close() { - and.cleanUp() - and.primaryIt.Close() - for _, it := range and.internalIterators { - it.Close() - } -} - -// Register this as an "and" iterator. -func (and *AndIterator) Type() string { return "and" } diff --git a/src/graph/and-iterator_test.go b/src/graph/and-iterator_test.go deleted file mode 100644 index d0fbf2e..0000000 --- a/src/graph/and-iterator_test.go +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" -) - -// Make sure that tags work on the And. -func TestTag(t *testing.T) { - fix1 := newFixedIterator() - fix1.AddValue(234) - fix1.AddTag("foo") - and := NewAndIterator() - and.AddSubIterator(fix1) - and.AddTag("bar") - out := fix1.Tags() - if len(out) != 1 { - t.Errorf("Expected length 1, got %d", len(out)) - } - if out[0] != "foo" { - t.Errorf("Cannot get tag back, got %s", out[0]) - } - - val, ok := and.Next() - if !ok { - t.Errorf("And did not next") - } - if val != 234 { - t.Errorf("Unexpected value") - } - tags := make(map[string]TSVal) - and.TagResults(&tags) - if tags["bar"] != 234 { - t.Errorf("no bar tag") - } - if tags["foo"] != 234 { - t.Errorf("no foo tag") - } -} - -// Do a simple itersection of fixed values. -func TestAndAndFixedIterators(t *testing.T) { - fix1 := newFixedIterator() - fix1.AddValue(1) - fix1.AddValue(2) - fix1.AddValue(3) - fix1.AddValue(4) - fix2 := newFixedIterator() - fix2.AddValue(3) - fix2.AddValue(4) - fix2.AddValue(5) - and := NewAndIterator() - and.AddSubIterator(fix1) - and.AddSubIterator(fix2) - // Should be as big as smallest subiterator - size, accurate := and.Size() - if size != 3 { - t.Error("Incorrect size") - } - if !accurate { - t.Error("not accurate") - } - - val, ok := and.Next() - if val != 3 || ok == false { - t.Error("Incorrect first value") - } - - val, ok = and.Next() - if val != 4 || ok == false { - t.Error("Incorrect second value") - } - - val, ok = and.Next() - if ok { - t.Error("Too many values") - } - -} - -// If there's no intersection, the size should still report the same, -// but there should be nothing to Next() -func TestNonOverlappingFixedIterators(t *testing.T) { - fix1 := newFixedIterator() - fix1.AddValue(1) - fix1.AddValue(2) - fix1.AddValue(3) - fix1.AddValue(4) - fix2 := newFixedIterator() - fix2.AddValue(5) - fix2.AddValue(6) - fix2.AddValue(7) - and := NewAndIterator() - and.AddSubIterator(fix1) - and.AddSubIterator(fix2) - // Should be as big as smallest subiterator - size, accurate := and.Size() - if size != 3 { - t.Error("Incorrect size") - } - if !accurate { - t.Error("not accurate") - } - - _, ok := and.Next() - if ok { - t.Error("Too many values") - } - -} - -func TestAllIterators(t *testing.T) { - all1 := NewInt64AllIterator(1, 5) - all2 := NewInt64AllIterator(4, 10) - and := NewAndIterator() - and.AddSubIterator(all2) - and.AddSubIterator(all1) - - val, ok := and.Next() - if val.(int64) != 4 || ok == false { - t.Error("Incorrect first value") - } - - val, ok = and.Next() - if val.(int64) != 5 || ok == false { - t.Error("Incorrect second value") - } - - val, ok = and.Next() - if ok { - t.Error("Too many values") - } - -} diff --git a/src/graph/fixed-iterator.go b/src/graph/fixed-iterator.go deleted file mode 100644 index 7578611..0000000 --- a/src/graph/fixed-iterator.go +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the Fixed iterator. A fixed iterator is quite simple; it -// contains an explicit fixed array of values. -// -// A fixed iterator requires an Equality function to be passed to it, by reason that TSVal, the -// opaque Triple store value, may not answer to ==. - -import ( - "fmt" - "strings" -) - -// A Fixed iterator consists of it's values, an index (where it is in the process of Next()ing) and -// an equality function. -type FixedIterator struct { - BaseIterator - values []TSVal - lastIndex int - cmp Equality -} - -// Define the signature of an equality function. -type Equality func(a, b TSVal) bool - -// Define an equality function of purely ==, which works for native types. -func BasicEquality(a, b TSVal) bool { - if a == b { - return true - } - return false -} - -// Creates a new Fixed iterator based around == equality. -func newFixedIterator() *FixedIterator { - return NewFixedIteratorWithCompare(BasicEquality) -} - -// Creates a new Fixed iterator with a custom comparitor. -func NewFixedIteratorWithCompare(compareFn Equality) *FixedIterator { - var it FixedIterator - BaseIteratorInit(&it.BaseIterator) - it.values = make([]TSVal, 0, 20) - it.lastIndex = 0 - it.cmp = compareFn - return &it -} - -func (f *FixedIterator) Reset() { - f.lastIndex = 0 -} - -func (f *FixedIterator) Close() { -} - -func (f *FixedIterator) Clone() Iterator { - out := NewFixedIteratorWithCompare(f.cmp) - for _, val := range f.values { - out.AddValue(val) - } - out.CopyTagsFrom(f) - return out -} - -// Add a value to the iterator. The array now contains this value. -// TODO(barakmich): This ought to be a set someday, disallowing repeated values. -func (f *FixedIterator) AddValue(v TSVal) { - f.values = append(f.values, v) -} - -// Print some information about the iterator. -func (f *FixedIterator) DebugString(indent int) string { - value := "" - if len(f.values) > 0 { - value = fmt.Sprint(f.values[0]) - } - return fmt.Sprintf("%s(%s tags: %s Size: %d id0: %d)", - strings.Repeat(" ", indent), - f.Type(), - f.FixedTags(), - len(f.values), - value, - ) -} - -// Register this iterator as a Fixed iterator. -func (f *FixedIterator) Type() string { - return "fixed" -} - -// Check if the passed value is equal to one of the values stored in the iterator. -func (f *FixedIterator) Check(v TSVal) bool { - // Could be optimized by keeping it sorted or using a better datastructure. - // However, for fixed iterators, which are by definition kind of tiny, this - // isn't a big issue. - CheckLogIn(f, v) - for _, x := range f.values { - if f.cmp(x, v) { - f.Last = x - return CheckLogOut(f, v, true) - } - } - return CheckLogOut(f, v, false) -} - -// Return the next stored value from the iterator. -func (f *FixedIterator) Next() (TSVal, bool) { - NextLogIn(f) - if f.lastIndex == len(f.values) { - return NextLogOut(f, nil, false) - } - out := f.values[f.lastIndex] - f.Last = out - f.lastIndex++ - return NextLogOut(f, out, true) -} - -// Optimize() for a Fixed iterator is simple. Returns a Null iterator if it's empty -// (so that other iterators upstream can treat this as null) or there is no -// optimization. -func (f *FixedIterator) Optimize() (Iterator, bool) { - - if len(f.values) == 1 && f.values[0] == nil { - return &NullIterator{}, true - } - - return f, false -} - -// Size is the number of values stored. -func (f *FixedIterator) Size() (int64, bool) { - return int64(len(f.values)), true -} - -// As we right now have to scan the entire list, Next and Check are linear with the -// size. However, a better data structure could remove these limits. -func (a *FixedIterator) GetStats() *IteratorStats { - return &IteratorStats{ - CheckCost: int64(len(a.values)), - NextCost: int64(len(a.values)), - Size: int64(len(a.values)), - } -} diff --git a/src/graph/hasa-iterator.go b/src/graph/hasa-iterator.go deleted file mode 100644 index 362b96d..0000000 --- a/src/graph/hasa-iterator.go +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the HasA iterator. The HasA takes a -// subiterator of links, and acts as an iterator of nodes in the given -// direction. The name comes from the idea that a "link HasA subject" or a "link -// HasA predicate". -// -// HasA is weird in that it may return the same value twice if on the Next() -// path. That's okay -- in reality, it can be viewed as returning the value for -// a new triple, but to make logic much simpler, here we have the HasA. -// -// Likewise, it's important to think about Check()ing a HasA. When given a -// value to check, it means "Check all predicates that have this value for your -// direction against the subiterator." This would imply that there's more than -// one possibility for the same Check()ed value. While we could return the -// number of options, it's simpler to return one, and then call NextResult() -// enough times to enumerate the options. (In fact, one could argue that the -// raison d'etre for NextResult() is this iterator). -// -// Alternatively, can be seen as the dual of the LinksTo iterator. - -import ( - "container/list" - "fmt" - "github.com/barakmich/glog" - "strings" -) - -// A HasaIterator consists of a reference back to the TripleStore that it references, -// a primary subiterator, a direction in which the triples for that subiterator point, -// and a temporary holder for the iterator generated on Check(). -type HasaIterator struct { - BaseIterator - ts TripleStore - primaryIt Iterator - direction string - resultIt Iterator -} - -// Construct a new HasA iterator, given the triple subiterator, and the triple -// direction for which it stands. -func NewHasaIterator(ts TripleStore, subIt Iterator, dir string) *HasaIterator { - var hasa HasaIterator - BaseIteratorInit(&hasa.BaseIterator) - hasa.ts = ts - hasa.primaryIt = subIt - hasa.direction = dir - return &hasa -} - -// Return our sole subiterator, in a list.List. -func (h *HasaIterator) GetSubIterators() *list.List { - l := list.New() - l.PushBack(h.primaryIt) - return l -} - -func (h *HasaIterator) Reset() { - h.primaryIt.Reset() - if h.resultIt != nil { - h.resultIt.Close() - } -} - -func (h *HasaIterator) Clone() Iterator { - out := NewHasaIterator(h.ts, h.primaryIt.Clone(), h.direction) - out.CopyTagsFrom(h) - return out -} - -// Direction accessor. -func (h *HasaIterator) Direction() string { return h.direction } - -// Pass the Optimize() call along to the subiterator. If it becomes Null, -// then the HasA becomes Null (there are no triples that have any directions). -func (h *HasaIterator) Optimize() (Iterator, bool) { - - newPrimary, changed := h.primaryIt.Optimize() - if changed { - h.primaryIt = newPrimary - if h.primaryIt.Type() == "null" { - return h.primaryIt, true - } - } - return h, false -} - -// Pass the TagResults down the chain. -func (h *HasaIterator) TagResults(out *map[string]TSVal) { - h.BaseIterator.TagResults(out) - h.primaryIt.TagResults(out) -} - -// DEPRECATED Return results in a ResultTree. -func (h *HasaIterator) GetResultTree() *ResultTree { - tree := NewResultTree(h.LastResult()) - tree.AddSubtree(h.primaryIt.GetResultTree()) - return tree -} - -// Print some information about this iterator. -func (h *HasaIterator) DebugString(indent int) string { - var tags string - for _, k := range h.Tags() { - tags += fmt.Sprintf("%s;", k) - } - return fmt.Sprintf("%s(%s %d tags:%s direction:%s\n%s)", strings.Repeat(" ", indent), h.Type(), h.GetUid(), tags, h.direction, h.primaryIt.DebugString(indent+4)) -} - -// Check a value against our internal iterator. In order to do this, we must first open a new -// iterator of "triples that have `val` in our direction", given to us by the triple store, -// and then Next() values out of that iterator and Check() them against our subiterator. -func (h *HasaIterator) Check(val TSVal) bool { - CheckLogIn(h, val) - if glog.V(4) { - glog.V(4).Infoln("Id is", h.ts.GetNameFor(val)) - } - // TODO(barakmich): Optimize this - if h.resultIt != nil { - h.resultIt.Close() - } - h.resultIt = h.ts.GetTripleIterator(h.direction, val) - return CheckLogOut(h, val, h.GetCheckResult()) -} - -// GetCheckResult() is shared code between Check() and GetNextResult() -- calls next on the -// result iterator (a triple iterator based on the last checked value) and returns true if -// another match is made. -func (h *HasaIterator) GetCheckResult() bool { - for { - linkVal, ok := h.resultIt.Next() - if !ok { - break - } - if glog.V(4) { - glog.V(4).Infoln("Triple is", h.ts.GetTriple(linkVal).ToString()) - } - if h.primaryIt.Check(linkVal) { - h.Last = h.ts.GetTripleDirection(linkVal, h.direction) - return true - } - } - return false -} - -// Get the next result that matches this branch. -func (h *HasaIterator) NextResult() bool { - // Order here is important. If the subiterator has a NextResult, then we - // need do nothing -- there is a next result, and we shouldn't move forward. - // However, we then need to get the next result from our last Check(). - // - // The upshot is, the end of NextResult() bubbles up from the bottom of the - // iterator tree up, and we need to respect that. - if h.primaryIt.NextResult() { - return true - } - return h.GetCheckResult() -} - -// Get the next result from this iterator. This is simpler than Check. We have a -// subiterator we can get a value from, and we can take that resultant triple, -// pull our direction out of it, and return that. -func (h *HasaIterator) Next() (TSVal, bool) { - NextLogIn(h) - if h.resultIt != nil { - h.resultIt.Close() - } - h.resultIt = &NullIterator{} - - tID, ok := h.primaryIt.Next() - if !ok { - return NextLogOut(h, 0, false) - } - name := h.ts.GetTriple(tID).Get(h.direction) - val := h.ts.GetIdFor(name) - h.Last = val - return NextLogOut(h, val, true) -} - -// GetStats() returns the statistics on the HasA iterator. This is curious. Next -// cost is easy, it's an extra call or so on top of the subiterator Next cost. -// CheckCost involves going to the TripleStore, iterating out values, and hoping -// one sticks -- potentially expensive, depending on fanout. Size, however, is -// potentially smaller. we know at worst it's the size of the subiterator, but -// if there are many repeated values, it could be much smaller in totality. -func (h *HasaIterator) GetStats() *IteratorStats { - subitStats := h.primaryIt.GetStats() - // TODO(barakmich): These should really come from the triplestore itself - // and be optimized. - faninFactor := int64(1) - fanoutFactor := int64(30) - nextConstant := int64(2) - tripleConstant := int64(1) - return &IteratorStats{ - NextCost: tripleConstant + subitStats.NextCost, - CheckCost: (fanoutFactor * nextConstant) * subitStats.CheckCost, - Size: faninFactor * subitStats.Size, - } -} - -// Close the subiterator, the result iterator (if any) and the HasA. -func (h *HasaIterator) Close() { - if h.resultIt != nil { - h.resultIt.Close() - } - h.primaryIt.Close() -} - -// Register this iterator as a HasA. -func (h *HasaIterator) Type() string { return "hasa" } diff --git a/src/graph/iterator.go b/src/graph/iterator.go deleted file mode 100644 index 7aa25bc..0000000 --- a/src/graph/iterator.go +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Define the general iterator interface, as well as the BaseIterator which all -// iterators can "inherit" from to get default iterator functionality. - -import ( - "container/list" - "fmt" - "github.com/barakmich/glog" - "strings" -) - -var iterator_n int = 0 - -type Iterator interface { - // Tags are the way we handle results. By adding a tag to an iterator, we can - // "name" it, in a sense, and at each step of iteration, get a named result. - // TagResults() is therefore the handy way of walking an iterator tree and - // getting the named results. - // - // Tag Accessors. - AddTag(string) - Tags() []string - AddFixedTag(string, TSVal) - FixedTags() map[string]TSVal - CopyTagsFrom(Iterator) - // Fills a tag-to-result-value map. - TagResults(*map[string]TSVal) - // Returns the current result. - LastResult() TSVal - // DEPRECATED -- Fills a ResultTree struct with Result(). - GetResultTree() *ResultTree - - // These methods are the heart and soul of the iterator, as they constitute - // the iteration interface. - // - // To get the full results of iteraton, do the following: - // while (!Next()): - // emit result - // while (!NextResult()): - // emit result - // - // All of them should set iterator.Last to be the last returned value, to - // make results work. - // - // Next() advances the iterator and returns the next valid result. Returns - // (, true) or (nil, false) - Next() (TSVal, bool) - // NextResult() advances iterators that may have more than one valid result, - // from the bottom up. - NextResult() bool - // Check(), given a value, returns whether or not that value is within the set - // held by this iterator. - Check(TSVal) bool - // Start iteration from the beginning - Reset() - // Create a new iterator just like this one - Clone() Iterator - // These methods relate to choosing the right iterator, or optimizing an - // iterator tree - // - // GetStats() returns the relative costs of calling the iteration methods for - // this iterator, as well as the size. Roughly, it will take NextCost * Size - // "cost units" to get everything out of the iterator. This is a wibbly-wobbly - // thing, and not exact, but a useful heuristic. - GetStats() *IteratorStats - // Helpful accessor for the number of things in the iterator. The first return - // value is the size, and the second return value is whether that number is exact, - // or a conservative estimate. - Size() (int64, bool) - // Returns a string relating to what the function of the iterator is. By - // knowing the names of the iterators, we can devise optimization strategies. - Type() string - // Optimizes an iterator. Can replace the iterator, or merely move things - // around internally. if it chooses to replace it with a better iterator, - // returns (the new iterator, true), if not, it returns (self, false). - Optimize() (Iterator, bool) - // Return a list of the subiterators for this iterator. - GetSubIterators() *list.List - - // Return a string representation of the iterator, indented by the given amount. - DebugString(int) string - // Return whether this iterator is relaiably nextable. Most iterators are. - // However, some iterators, like "not" are, by definition, the whole database - // except themselves. Next() on these is unproductive, if impossible. - Nextable() bool - // Close the iterator and do internal cleanup. - Close() - GetUid() int -} - -type IteratorStats struct { - CheckCost int64 - NextCost int64 - Size int64 -} - -// The Base iterator is the iterator other iterators inherit from to get some -// default functionality. -type BaseIterator struct { - Last TSVal - tags []string - fixedTags map[string]TSVal - nextable bool - uid int -} - -// Called by subclases. -func BaseIteratorInit(b *BaseIterator) { - // Your basic iterator is nextable - b.nextable = true - b.uid = iterator_n - if glog.V(2) { - iterator_n++ - } -} - -func (b *BaseIterator) GetUid() int { - return b.uid -} - -// Adds a tag to the iterator. Most iterators don't need to override. -func (b *BaseIterator) AddTag(tag string) { - if b.tags == nil { - b.tags = make([]string, 0) - } - b.tags = append(b.tags, tag) -} - -func (b *BaseIterator) AddFixedTag(tag string, value TSVal) { - if b.fixedTags == nil { - b.fixedTags = make(map[string]TSVal) - } - b.fixedTags[tag] = value -} - -// Returns the tags. -func (b *BaseIterator) Tags() []string { - return b.tags -} - -func (b *BaseIterator) FixedTags() map[string]TSVal { - return b.fixedTags -} - -func (b *BaseIterator) CopyTagsFrom(other_it Iterator) { - for _, tag := range other_it.Tags() { - b.AddTag(tag) - } - - for k, v := range other_it.FixedTags() { - b.AddFixedTag(k, v) - } - -} - -// Prints a silly debug string. Most classes override. -func (n *BaseIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(base)", strings.Repeat(" ", indent)) -} - -// Nothing in a base iterator. -func (n *BaseIterator) Check(v TSVal) bool { - return false -} - -// Base iterators should never appear in a tree if they are, select against -// them. -func (n *BaseIterator) GetStats() *IteratorStats { - return &IteratorStats{100000, 100000, 100000} -} - -// DEPRECATED -func (b *BaseIterator) GetResultTree() *ResultTree { - tree := NewResultTree(b.LastResult()) - return tree -} - -// Nothing in a base iterator. -func (n *BaseIterator) Next() (TSVal, bool) { - return nil, false -} - -func (n *BaseIterator) NextResult() bool { - return false -} - -// Returns the last result of an iterator. -func (n *BaseIterator) LastResult() TSVal { - return n.Last -} - -// If you're empty and you know it, clap your hands. -func (n *BaseIterator) Size() (int64, bool) { - return 0, true -} - -// No subiterators. Only those with subiterators need to do anything here. -func (n *BaseIterator) GetSubIterators() *list.List { - return nil -} - -// Accessor -func (b *BaseIterator) Nextable() bool { return b.nextable } - -// Fill the map based on the tags assigned to this iterator. Default -// functionality works well for most iterators. -func (a *BaseIterator) TagResults(out_map *map[string]TSVal) { - for _, tag := range a.Tags() { - (*out_map)[tag] = a.LastResult() - } - - for tag, value := range a.FixedTags() { - (*out_map)[tag] = value - } -} - -// Nothing to clean up. -//func (a *BaseIterator) Close() {} -func (a *NullIterator) Close() {} - -func (a *BaseIterator) Reset() {} - -// Here we define the simplest base iterator -- the Null iterator. It contains nothing. -// It is the empty set. Often times, queries that contain one of these match nothing, -// so it's important to give it a special iterator. -type NullIterator struct { - BaseIterator -} - -// Fairly useless New function. -func NewNullIterator() *NullIterator { - var n NullIterator - return &n -} - -func (n *NullIterator) Clone() Iterator { return NewNullIterator() } - -// Name the null iterator. -func (n *NullIterator) Type() string { return "null" } - -// A good iterator will close itself when it returns true. -// Null has nothing it needs to do. -func (n *NullIterator) Optimize() (Iterator, bool) { return n, false } - -// Print the null iterator. -func (n *NullIterator) DebugString(indent int) string { - return strings.Repeat(" ", indent) + "(null)" -} - -// A null iterator costs nothing. Use it! -func (n *NullIterator) GetStats() *IteratorStats { - return &IteratorStats{0, 0, 0} -} - -// Utility logging functions for when an iterator gets called Next upon, or Check upon, as -// well as what they return. Highly useful for tracing the execution path of a query. -func CheckLogIn(it Iterator, val TSVal) { - if glog.V(4) { - glog.V(4).Infof("%s %d CHECK %d", strings.ToUpper(it.Type()), it.GetUid(), val) - } -} - -func CheckLogOut(it Iterator, val TSVal, good bool) bool { - if glog.V(4) { - if good { - glog.V(4).Infof("%s %d CHECK %d GOOD", strings.ToUpper(it.Type()), it.GetUid(), val) - } else { - glog.V(4).Infof("%s %d CHECK %d BAD", strings.ToUpper(it.Type()), it.GetUid(), val) - } - } - return good -} - -func NextLogIn(it Iterator) { - if glog.V(4) { - glog.V(4).Infof("%s %d NEXT", strings.ToUpper(it.Type()), it.GetUid()) - } -} - -func NextLogOut(it Iterator, val TSVal, ok bool) (TSVal, bool) { - if glog.V(4) { - if ok { - glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type()), it.GetUid(), val) - } else { - glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type()), it.GetUid()) - } - } - return val, ok -} diff --git a/src/graph/linksto-iterator.go b/src/graph/linksto-iterator.go deleted file mode 100644 index 45c5e2f..0000000 --- a/src/graph/linksto-iterator.go +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines one of the base iterators, the LinksTo iterator. A LinksTo takes a -// subiterator of nodes, and contains an iteration of links which "link to" -// those nodes in a given direction. -// -// Next()ing a LinksTo is straightforward -- iterate through all links to // -// things in the subiterator, and then advance the subiterator, and do it again. -// LinksTo is therefore sensitive to growing with a fanout. (A small-sized -// subiterator could cause LinksTo to be large). -// -// Check()ing a LinksTo means, given a link, take the direction we care about -// and check if it's in our subiterator. Checking is therefore fairly cheap, and -// similar to checking the subiterator alone. -// -// Can be seen as the dual of the HasA iterator. - -import ( - "container/list" - "fmt" - "strings" -) - -// A LinksTo has a reference back to the TripleStore (to create the iterators -// for each node) the subiterator, and the direction the iterator comes from. -// `next_it` is the tempoarary iterator held per result in `primary_it`. -type LinksToIterator struct { - BaseIterator - ts TripleStore - primaryIt Iterator - direction string - nextIt Iterator -} - -// Construct a new LinksTo iterator around a direction and a subiterator of -// nodes. -func NewLinksToIterator(ts TripleStore, it Iterator, dir string) *LinksToIterator { - var lto LinksToIterator - BaseIteratorInit(<o.BaseIterator) - lto.ts = ts - lto.primaryIt = it - lto.direction = dir - lto.nextIt = &NullIterator{} - return <o -} - -func (l *LinksToIterator) Reset() { - l.primaryIt.Reset() - if l.nextIt != nil { - l.nextIt.Close() - } - l.nextIt = &NullIterator{} -} - -func (l *LinksToIterator) Clone() Iterator { - out := NewLinksToIterator(l.ts, l.primaryIt.Clone(), l.direction) - out.CopyTagsFrom(l) - return out -} - -// Return the direction under consideration. -func (l *LinksToIterator) Direction() string { return l.direction } - -// Tag these results, and our subiterator's results. -func (l *LinksToIterator) TagResults(out *map[string]TSVal) { - l.BaseIterator.TagResults(out) - l.primaryIt.TagResults(out) -} - -// DEPRECATED -func (l *LinksToIterator) GetResultTree() *ResultTree { - tree := NewResultTree(l.LastResult()) - tree.AddSubtree(l.primaryIt.GetResultTree()) - return tree -} - -// Print the iterator. -func (l *LinksToIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s %d direction:%s\n%s)", - strings.Repeat(" ", indent), - l.Type(), l.GetUid(), l.direction, l.primaryIt.DebugString(indent+4)) -} - -// If it checks in the right direction for the subiterator, it is a valid link -// for the LinksTo. -func (l *LinksToIterator) Check(val TSVal) bool { - CheckLogIn(l, val) - node := l.ts.GetTripleDirection(val, l.direction) - if l.primaryIt.Check(node) { - l.Last = val - return CheckLogOut(l, val, true) - } - return CheckLogOut(l, val, false) -} - -// Return a list containing only our subiterator. -func (lto *LinksToIterator) GetSubIterators() *list.List { - l := list.New() - l.PushBack(lto.primaryIt) - return l -} - -// Optimize the LinksTo, by replacing it if it can be. -func (lto *LinksToIterator) Optimize() (Iterator, bool) { - newPrimary, changed := lto.primaryIt.Optimize() - if changed { - lto.primaryIt = newPrimary - if lto.primaryIt.Type() == "null" { - lto.nextIt.Close() - return lto.primaryIt, true - } - } - // Ask the TripleStore if we can be replaced. Often times, this is a great - // optimization opportunity (there's a fixed iterator underneath us, for - // example). - newReplacement, hasOne := lto.ts.OptimizeIterator(lto) - if hasOne { - lto.Close() - return newReplacement, true - } - return lto, false -} - -// Next()ing a LinksTo operates as described above. -func (l *LinksToIterator) Next() (TSVal, bool) { - NextLogIn(l) - val, ok := l.nextIt.Next() - if !ok { - // Subiterator is empty, get another one - candidate, ok := l.primaryIt.Next() - if !ok { - // We're out of nodes in our subiterator, so we're done as well. - return NextLogOut(l, 0, false) - } - l.nextIt.Close() - l.nextIt = l.ts.GetTripleIterator(l.direction, candidate) - // Recurse -- return the first in the next set. - return l.Next() - } - l.Last = val - return NextLogOut(l, val, ok) -} - -// Close our subiterators. -func (l *LinksToIterator) Close() { - l.nextIt.Close() - l.primaryIt.Close() -} - -// We won't ever have a new result, but our subiterators might. -func (l *LinksToIterator) NextResult() bool { - return l.primaryIt.NextResult() -} - -// Register the LinksTo. -func (l *LinksToIterator) Type() string { return "linksto" } - -// Return a guess as to how big or costly it is to next the iterator. -func (l *LinksToIterator) GetStats() *IteratorStats { - subitStats := l.primaryIt.GetStats() - // TODO(barakmich): These should really come from the triplestore itself - fanoutFactor := int64(20) - checkConstant := int64(1) - nextConstant := int64(2) - return &IteratorStats{ - NextCost: nextConstant + subitStats.NextCost, - CheckCost: checkConstant + subitStats.CheckCost, - Size: fanoutFactor * subitStats.Size, - } -} diff --git a/src/graph/linksto-iterator_test.go b/src/graph/linksto-iterator_test.go deleted file mode 100644 index 06cdd2a..0000000 --- a/src/graph/linksto-iterator_test.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" -) - -func TestLinksTo(t *testing.T) { - ts := new(TestTripleStore) - tsFixed := newFixedIterator() - tsFixed.AddValue(2) - ts.On("GetIdFor", "cool").Return(1) - ts.On("GetTripleIterator", "o", 1).Return(tsFixed) - fixed := newFixedIterator() - fixed.AddValue(ts.GetIdFor("cool")) - lto := NewLinksToIterator(ts, fixed, "o") - val, ok := lto.Next() - if !ok { - t.Error("At least one triple matches the fixed object") - } - if val != 2 { - t.Errorf("Triple index 2, such as %s, should match %s", ts.GetTriple(2), ts.GetTriple(val)) - } -} diff --git a/src/graph/mock_ts.go b/src/graph/mock_ts.go deleted file mode 100644 index fcd9244..0000000 --- a/src/graph/mock_ts.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// A quickly mocked version of the TripleStore interface, for use in tests. -// Can better used Mock.Called but will fill in as needed. - -import ( - "github.com/stretchrcom/testify/mock" -) - -type TestTripleStore struct { - mock.Mock -} - -func (ts *TestTripleStore) GetIdFor(s string) TSVal { - args := ts.Mock.Called(s) - return args.Get(0) -} -func (ts *TestTripleStore) AddTriple(*Triple) {} -func (ts *TestTripleStore) AddTripleSet([]*Triple) {} -func (ts *TestTripleStore) GetTriple(TSVal) *Triple { return &Triple{} } -func (ts *TestTripleStore) GetTripleIterator(s string, i TSVal) Iterator { - args := ts.Mock.Called(s, i) - return args.Get(0).(Iterator) -} -func (ts *TestTripleStore) GetNodesAllIterator() Iterator { return &NullIterator{} } -func (ts *TestTripleStore) GetTriplesAllIterator() Iterator { return &NullIterator{} } -func (ts *TestTripleStore) GetIteratorByString(string, string, string) Iterator { - return &NullIterator{} -} -func (ts *TestTripleStore) GetNameFor(v TSVal) string { - args := ts.Mock.Called(v) - return args.Get(0).(string) -} -func (ts *TestTripleStore) Size() int64 { return 0 } -func (ts *TestTripleStore) DebugPrint() {} -func (ts *TestTripleStore) OptimizeIterator(it Iterator) (Iterator, bool) { - return &NullIterator{}, false -} -func (ts *TestTripleStore) MakeFixed() *FixedIterator { - return NewFixedIteratorWithCompare(BasicEquality) -} -func (ts *TestTripleStore) Close() {} -func (ts *TestTripleStore) GetTripleDirection(TSVal, string) TSVal { return 0 } -func (ts *TestTripleStore) RemoveTriple(t *Triple) {} diff --git a/src/graph/optional-iterator.go b/src/graph/optional-iterator.go deleted file mode 100644 index 8050ba8..0000000 --- a/src/graph/optional-iterator.go +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// "Optional" is kind of odd. It's not an iterator in the strictest sense, but -// it's easier to implement as an iterator. -// -// Consider what it means. It means that we have a subconstraint which we do -// not want to constrain the query -- we just want it to return the matching -// subgraph if one matches at all. By analogy to regular expressions, it is the -// '?' operator. -// -// If it were a proper iterator of its own (and indeed, a reasonable refactor -// of this iterator would be to make it such) it would contain an all iterator -// -- all things in the graph. It matches everything (as does the regex "(a)?") - -import ( - "fmt" - "github.com/barakmich/glog" - "strings" -) - -// An optional iterator has the subconstraint iterator we wish to be optional -// and whether the last check we received was true or false. -type OptionalIterator struct { - BaseIterator - subIt Iterator - lastCheck bool -} - -// Creates a new optional iterator. -func NewOptionalIterator(it Iterator) *OptionalIterator { - var o OptionalIterator - BaseIteratorInit(&o.BaseIterator) - o.nextable = false - o.subIt = it - return &o -} - -func (o *OptionalIterator) Reset() { - o.subIt.Reset() - o.lastCheck = false -} - -func (o *OptionalIterator) Close() { - o.subIt.Close() -} - -func (o *OptionalIterator) Clone() Iterator { - out := NewOptionalIterator(o.subIt.Clone()) - out.CopyTagsFrom(o) - return out -} - -// Nexting the iterator is unsupported -- error and return an empty set. -// (As above, a reasonable alternative would be to Next() an all iterator) -func (o *OptionalIterator) Next() (TSVal, bool) { - glog.Errorln("Nexting an un-nextable iterator") - return nil, false -} - -// An optional iterator only has a next result if, (a) last time we checked -// we had any results whatsoever, and (b) there was another subresult in our -// optional subbranch. -func (o *OptionalIterator) NextResult() bool { - if o.lastCheck { - return o.subIt.NextResult() - } - return false -} - -// Check() is the real hack of this iterator. It always returns true, regardless -// of whether the subiterator matched. But we keep track of whether the subiterator -// matched for results purposes. -func (o *OptionalIterator) Check(val TSVal) bool { - checked := o.subIt.Check(val) - o.lastCheck = checked - o.Last = val - return true -} - -// If we failed the check, then the subiterator should not contribute to the result -// set. Otherwise, go ahead and tag it. -func (o *OptionalIterator) TagResults(out *map[string]TSVal) { - if o.lastCheck == false { - return - } - o.subIt.TagResults(out) -} - -// Registers the optional iterator. -func (o *OptionalIterator) Type() string { return "optional" } - -// Prints the optional and it's subiterator. -func (o *OptionalIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s tags:%s\n%s)", - strings.Repeat(" ", indent), - o.Type(), - o.Tags(), - o.subIt.DebugString(indent+4)) -} - -// There's nothing to optimize for an optional. Optimize the subiterator and -// potentially replace it. -func (o *OptionalIterator) Optimize() (Iterator, bool) { - newSub, changed := o.subIt.Optimize() - if changed { - o.subIt.Close() - o.subIt = newSub - } - return o, false -} - -// We're only as expensive as our subiterator. Except, we can't be nexted. -func (o *OptionalIterator) GetStats() *IteratorStats { - subStats := o.subIt.GetStats() - return &IteratorStats{ - CheckCost: subStats.CheckCost, - NextCost: int64(1 << 62), - Size: subStats.Size, - } -} diff --git a/src/graph/or-iterator.go b/src/graph/or-iterator.go deleted file mode 100644 index a0de623..0000000 --- a/src/graph/or-iterator.go +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines the or and short-circuiting or iterator. Or is the union operator for it's subiterators. -// Short-circuiting-or is a little different. It will return values from the first iterator that returns -// values at all, and then stops. -// -// Never reorders the iterators from the order they arrive. It is either the union or the first one. -// May return the same value twice -- once for each branch. - -import ( - "container/list" - "fmt" - "strings" -) - -type OrIterator struct { - BaseIterator - isShortCircuiting bool - internalIterators []Iterator - itCount int - currentIterator int -} - -func NewOrIterator() *OrIterator { - var or OrIterator - BaseIteratorInit(&or.BaseIterator) - or.internalIterators = make([]Iterator, 0, 20) - or.isShortCircuiting = false - or.currentIterator = -1 - return &or -} - -func NewShortCircuitOrIterator() *OrIterator { - var or OrIterator - BaseIteratorInit(&or.BaseIterator) - or.internalIterators = make([]Iterator, 0, 20) - or.isShortCircuiting = true - or.currentIterator = -1 - return &or -} - -// Reset all internal iterators -func (or *OrIterator) Reset() { - for _, it := range or.internalIterators { - it.Reset() - } - or.currentIterator = -1 -} - -func (or *OrIterator) Clone() Iterator { - var newOr *OrIterator - if or.isShortCircuiting { - newOr = NewShortCircuitOrIterator() - } else { - newOr = NewOrIterator() - } - for _, it := range or.internalIterators { - newOr.AddSubIterator(it.Clone()) - } - or.CopyTagsFrom(or) - return newOr -} - -// Returns a list.List of the subiterators, in order. -func (or *OrIterator) GetSubIterators() *list.List { - l := list.New() - for _, it := range or.internalIterators { - l.PushBack(it) - } - return l -} - -// Overrides BaseIterator TagResults, as it needs to add it's own results and -// recurse down it's subiterators. -func (or *OrIterator) TagResults(out *map[string]TSVal) { - or.BaseIterator.TagResults(out) - or.internalIterators[or.currentIterator].TagResults(out) -} - -// DEPRECATED Returns the ResultTree for this iterator, recurses to it's subiterators. -func (or *OrIterator) GetResultTree() *ResultTree { - tree := NewResultTree(or.LastResult()) - for _, it := range or.internalIterators { - tree.AddSubtree(it.GetResultTree()) - } - return tree -} - -// Prints information about this iterator. -func (or *OrIterator) DebugString(indent int) string { - var total string - for i, it := range or.internalIterators { - total += strings.Repeat(" ", indent+2) - total += fmt.Sprintf("%d:\n%s\n", i, it.DebugString(indent+4)) - } - var tags string - for _, k := range or.Tags() { - tags += fmt.Sprintf("%s;", k) - } - spaces := strings.Repeat(" ", indent+2) - - return fmt.Sprintf("%s(%s\n%stags:%s\n%sits:\n%s)", - strings.Repeat(" ", indent), - or.Type(), - spaces, - tags, - spaces, - total) -} - -// Add a subiterator to this Or iterator. Order matters. -func (or *OrIterator) AddSubIterator(sub Iterator) { - or.internalIterators = append(or.internalIterators, sub) - or.itCount++ -} - -// Returns the Next value from the Or iterator. Because the Or is the -// union of its subiterators, it must produce from all subiterators -- unless -// it's shortcircuiting, in which case, it's the first one that returns anything. -func (or *OrIterator) Next() (TSVal, bool) { - NextLogIn(or) - var curr TSVal - var exists bool - firstTime := false - for { - if or.currentIterator == -1 { - or.currentIterator = 0 - firstTime = true - } - curIt := or.internalIterators[or.currentIterator] - curr, exists = curIt.Next() - if !exists { - if or.isShortCircuiting && !firstTime { - return NextLogOut(or, nil, false) - } - or.currentIterator++ - if or.currentIterator == or.itCount { - return NextLogOut(or, nil, false) - } - } else { - or.Last = curr - return NextLogOut(or, curr, true) - } - } - panic("Somehow broke out of Next() loop in OrIterator") -} - -// Checks a value against the iterators, in order. -func (or *OrIterator) checkSubIts(val TSVal) bool { - var subIsGood = false - for i, it := range or.internalIterators { - subIsGood = it.Check(val) - if subIsGood { - or.currentIterator = i - break - } - } - return subIsGood -} - -// Check a value against the entire iterator, in order. -func (or *OrIterator) Check(val TSVal) bool { - CheckLogIn(or, val) - anyGood := or.checkSubIts(val) - if !anyGood { - return CheckLogOut(or, val, false) - } - or.Last = val - return CheckLogOut(or, val, true) -} - -// Returns the approximate size of the Or iterator. Because we're dealing -// with a union, we know that the largest we can be is the sum of all the iterators, -// or in the case of short-circuiting, the longest. -func (or *OrIterator) Size() (int64, bool) { - var val int64 - var b bool - if or.isShortCircuiting { - val = 0 - b = true - for _, it := range or.internalIterators { - newval, newb := it.Size() - if val < newval { - val = newval - } - b = newb && b - } - } else { - val = 0 - b = true - for _, it := range or.internalIterators { - newval, newb := it.Size() - val += newval - b = newb && b - } - } - return val, b -} - -// An Or has no NextResult of its own -- that is, there are no other values -// which satisfy our previous result that are not the result itself. Our -// subiterators might, however, so just pass the call recursively. In the case of -// shortcircuiting, only allow new results from the currently checked iterator -func (or *OrIterator) NextResult() bool { - if or.currentIterator != -1 { - return or.internalIterators[or.currentIterator].NextResult() - } - return false -} - -// Perform or-specific cleanup, of which there currently is none. -func (or *OrIterator) cleanUp() {} - -// Close this iterator, and, by extension, close the subiterators. -// Close should be idempotent, and it follows that if it's subiterators -// follow this contract, the And follows the contract. -func (or *OrIterator) Close() { - or.cleanUp() - for _, it := range or.internalIterators { - it.Close() - } -} - -func (or *OrIterator) Optimize() (Iterator, bool) { - oldItList := or.GetSubIterators() - itList := optimizeSubIterators(oldItList) - // Close the replaced iterators (they ought to close themselves, but Close() - // is idempotent, so this just protects against any machinations). - closeIteratorList(oldItList, nil) - newOr := NewOrIterator() - newOr.isShortCircuiting = or.isShortCircuiting - - // Add the subiterators in order. - for e := itList.Front(); e != nil; e = e.Next() { - newOr.AddSubIterator(e.Value.(Iterator)) - } - - // Move the tags hanging on us (like any good replacement). - newOr.CopyTagsFrom(or) - - // And close ourselves but not our subiterators -- some may still be alive in - // the new And (they were unchanged upon calling Optimize() on them, at the - // start). - or.cleanUp() - return newOr, true -} - -func (or *OrIterator) GetStats() *IteratorStats { - CheckCost := int64(0) - NextCost := int64(0) - Size := int64(0) - for _, it := range or.internalIterators { - stats := it.GetStats() - NextCost += stats.NextCost - CheckCost += stats.CheckCost - if or.isShortCircuiting { - if Size < stats.Size { - Size = stats.Size - } - } else { - Size += stats.Size - } - } - return &IteratorStats{ - CheckCost: CheckCost, - NextCost: NextCost, - Size: Size, - } - -} - -// Register this as an "or" iterator. -func (or *OrIterator) Type() string { return "or" } diff --git a/src/graph/or-iterator_test.go b/src/graph/or-iterator_test.go deleted file mode 100644 index 9450094..0000000 --- a/src/graph/or-iterator_test.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - . "github.com/smartystreets/goconvey/convey" - "testing" -) - -func extractNumbersFromIterator(it Iterator) []int { - var outputNumbers []int - for { - val, ok := it.Next() - if !ok { - break - } - outputNumbers = append(outputNumbers, val.(int)) - } - return outputNumbers -} - -func TestOrIteratorBasics(t *testing.T) { - var orIt *OrIterator - - Convey("Given an Or Iterator of two fixed iterators", t, func() { - orIt = NewOrIterator() - fixed1 := newFixedIterator() - fixed1.AddValue(1) - fixed1.AddValue(2) - fixed1.AddValue(3) - fixed2 := newFixedIterator() - fixed2.AddValue(3) - fixed2.AddValue(9) - fixed2.AddValue(20) - fixed2.AddValue(21) - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - - Convey("It should guess its size.", func() { - v, _ := orIt.Size() - So(v, ShouldEqual, 7) - }) - - Convey("It should extract all the numbers, potentially twice.", func() { - allNumbers := []int{1, 2, 3, 3, 9, 20, 21} - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - orIt.Reset() - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - // Optimization works - newOr, _ := orIt.Optimize() - So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) - }) - - Convey("It should check that numbers in either iterator exist.", func() { - So(orIt.Check(2), ShouldEqual, true) - So(orIt.Check(3), ShouldEqual, true) - So(orIt.Check(21), ShouldEqual, true) - }) - - Convey("It should check that numbers not in either iterator are false.", func() { - So(orIt.Check(22), ShouldEqual, false) - So(orIt.Check(5), ShouldEqual, false) - So(orIt.Check(0), ShouldEqual, false) - }) - - }) - -} - -func TestShortCircuitingOrBasics(t *testing.T) { - var orIt *OrIterator - - Convey("Given a short-circuiting Or of two fixed iterators", t, func() { - orIt = NewShortCircuitOrIterator() - fixed1 := newFixedIterator() - fixed1.AddValue(1) - fixed1.AddValue(2) - fixed1.AddValue(3) - fixed2 := newFixedIterator() - fixed2.AddValue(3) - fixed2.AddValue(9) - fixed2.AddValue(20) - fixed2.AddValue(21) - - Convey("It should guess its size.", func() { - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - v, _ := orIt.Size() - So(v, ShouldEqual, 4) - }) - - Convey("It should extract the first iterators' numbers.", func() { - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - allNumbers := []int{1, 2, 3} - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - orIt.Reset() - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - // Optimization works - newOr, _ := orIt.Optimize() - So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) - }) - - Convey("It should check that numbers in either iterator exist.", func() { - orIt.AddSubIterator(fixed1) - orIt.AddSubIterator(fixed2) - So(orIt.Check(2), ShouldEqual, true) - So(orIt.Check(3), ShouldEqual, true) - So(orIt.Check(21), ShouldEqual, true) - So(orIt.Check(22), ShouldEqual, false) - So(orIt.Check(5), ShouldEqual, false) - So(orIt.Check(0), ShouldEqual, false) - - }) - - Convey("It should check that it pulls the second iterator's numbers if the first is empty.", func() { - orIt.AddSubIterator(newFixedIterator()) - orIt.AddSubIterator(fixed2) - allNumbers := []int{3, 9, 20, 21} - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - orIt.Reset() - So(extractNumbersFromIterator(orIt), ShouldResemble, allNumbers) - // Optimization works - newOr, _ := orIt.Optimize() - So(extractNumbersFromIterator(newOr), ShouldResemble, allNumbers) - }) - - }) - -} diff --git a/src/graph/query-shape.go b/src/graph/query-shape.go deleted file mode 100644 index dece079..0000000 --- a/src/graph/query-shape.go +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -type Node struct { - Id int `json:"id"` - Tags []string `json:"tags,omitempty"` - Values []string `json:"values,omitempty"` - IsLinkNode bool `json:"is_link_node"` - IsFixed bool `json:"is_fixed"` -} - -type Link struct { - Source int `json:"source"` - Target int `json:"target"` - Pred int `json:"type"` - LinkNode int `json:"link_node"` -} - -type queryShape struct { - nodes []Node - links []Link - ts TripleStore - nodeId int - hasaIds []int - hasaDirs []string -} - -func OutputQueryShapeForIterator(it Iterator, ts TripleStore, outputMap *map[string]interface{}) { - qs := &queryShape{} - qs.nodes = make([]Node, 0) - qs.links = make([]Link, 0) - qs.hasaIds = make([]int, 0) - qs.hasaDirs = make([]string, 0) - qs.ts = ts - qs.nodeId = 1 - - node := qs.MakeNode(it.Clone()) - qs.AddNode(node) - (*outputMap)["nodes"] = qs.nodes - (*outputMap)["links"] = qs.links -} - -func (qs *queryShape) AddNode(n *Node) { - qs.nodes = append(qs.nodes, *n) -} - -func (qs *queryShape) AddLink(l *Link) { - qs.links = append(qs.links, *l) -} - -func (qs *queryShape) LastHasa() (int, string) { - return qs.hasaIds[len(qs.hasaIds)-1], qs.hasaDirs[len(qs.hasaDirs)-1] -} - -func (qs *queryShape) PushHasa(i int, s string) { - qs.hasaIds = append(qs.hasaIds, i) - qs.hasaDirs = append(qs.hasaDirs, s) -} - -func (qs *queryShape) RemoveHasa() { - qs.hasaIds = qs.hasaIds[:len(qs.hasaIds)-1] - qs.hasaDirs = qs.hasaDirs[:len(qs.hasaDirs)-1] -} - -func (qs *queryShape) StealNode(left *Node, right *Node) { - for _, v := range right.Values { - left.Values = append(left.Values, v) - } - for _, v := range right.Tags { - left.Tags = append(left.Tags, v) - } - left.IsLinkNode = left.IsLinkNode || right.IsLinkNode - left.IsFixed = left.IsFixed || right.IsFixed - for i, link := range qs.links { - rewrite := false - if link.LinkNode == right.Id { - link.LinkNode = left.Id - rewrite = true - } - if link.Source == right.Id { - link.Source = left.Id - rewrite = true - } - if link.Target == right.Id { - link.Target = left.Id - rewrite = true - } - if rewrite { - qs.links = append(append(qs.links[:i], qs.links[i+1:]...), link) - } - } -} - -func (qs *queryShape) MakeNode(it Iterator) *Node { - var n Node - n.IsLinkNode = false - n.IsFixed = false - n.Id = qs.nodeId - n.Tags = make([]string, 0) - n.Values = make([]string, 0) - for _, tag := range it.Tags() { - n.Tags = append(n.Tags, tag) - } - for k, _ := range it.FixedTags() { - n.Tags = append(n.Tags, k) - } - - switch it.Type() { - case "and": - list := it.GetSubIterators() - for e := list.Front(); e != nil; e = e.Next() { - subit := e.Value.(Iterator) - qs.nodeId++ - newNode := qs.MakeNode(subit) - if subit.Type() != "or" { - qs.StealNode(&n, newNode) - } else { - qs.AddNode(newNode) - qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) - } - } - case "fixed": - n.IsFixed = true - for { - val, more := it.Next() - if !more { - break - } - n.Values = append(n.Values, qs.ts.GetNameFor(val)) - } - case "hasa": - hasa := it.(*HasaIterator) - qs.PushHasa(n.Id, hasa.direction) - qs.nodeId++ - newNode := qs.MakeNode(hasa.primaryIt) - qs.AddNode(newNode) - qs.RemoveHasa() - case "or": - list := it.GetSubIterators() - for e := list.Front(); e != nil; e = e.Next() { - subit := e.Value.(Iterator) - qs.nodeId++ - newNode := qs.MakeNode(subit) - if subit.Type() == "or" { - qs.StealNode(&n, newNode) - } else { - qs.AddNode(newNode) - qs.AddLink(&Link{n.Id, newNode.Id, 0, 0}) - } - } - case "linksto": - n.IsLinkNode = true - lto := it.(*LinksToIterator) - qs.nodeId++ - newNode := qs.MakeNode(lto.primaryIt) - hasaID, hasaDir := qs.LastHasa() - if (hasaDir == "s" && lto.direction == "o") || - (hasaDir == "o" && lto.direction == "s") { - qs.AddNode(newNode) - if hasaDir == "s" { - qs.AddLink(&Link{hasaID, newNode.Id, 0, n.Id}) - } else { - qs.AddLink(&Link{newNode.Id, hasaID, 0, n.Id}) - } - } else if lto.primaryIt.Type() == "fixed" { - qs.StealNode(&n, newNode) - } else { - qs.AddNode(newNode) - } - case "optional": - // Unsupported, for the moment - fallthrough - case "all": - } - return &n -} diff --git a/src/graph/query-shape_test.go b/src/graph/query-shape_test.go deleted file mode 100644 index b0d6950..0000000 --- a/src/graph/query-shape_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - . "github.com/smartystreets/goconvey/convey" - "testing" -) - -func buildHasaWithTag(ts TripleStore, tag string, target string) *HasaIterator { - fixed_obj := ts.MakeFixed() - fixed_pred := ts.MakeFixed() - fixed_obj.AddValue(ts.GetIdFor(target)) - fixed_pred.AddValue(ts.GetIdFor("status")) - fixed_obj.AddTag(tag) - lto1 := NewLinksToIterator(ts, fixed_obj, "o") - lto2 := NewLinksToIterator(ts, fixed_pred, "p") - and := NewAndIterator() - and.AddSubIterator(lto1) - and.AddSubIterator(lto2) - hasa := NewHasaIterator(ts, and, "s") - return hasa -} - -func TestQueryShape(t *testing.T) { - var queryShape map[string]interface{} - var ts *TestTripleStore - ts = new(TestTripleStore) - ts.On("GetIdFor", "cool").Return(1) - ts.On("GetNameFor", 1).Return("cool") - ts.On("GetIdFor", "status").Return(2) - ts.On("GetNameFor", 2).Return("status") - ts.On("GetIdFor", "fun").Return(3) - ts.On("GetNameFor", 3).Return("fun") - ts.On("GetIdFor", "name").Return(4) - ts.On("GetNameFor", 4).Return("name") - - Convey("Given a single linkage iterator's shape", t, func() { - queryShape = make(map[string]interface{}) - hasa := buildHasaWithTag(ts, "tag", "cool") - hasa.AddTag("top") - OutputQueryShapeForIterator(hasa, ts, &queryShape) - - Convey("It should have three nodes and one link", func() { - nodes := queryShape["nodes"].([]Node) - links := queryShape["links"].([]Link) - So(len(nodes), ShouldEqual, 3) - So(len(links), ShouldEqual, 1) - }) - - Convey("These nodes should be correctly tagged", func() { - nodes := queryShape["nodes"].([]Node) - So(nodes[0].Tags, ShouldResemble, []string{"tag"}) - So(nodes[1].IsLinkNode, ShouldEqual, true) - So(nodes[2].Tags, ShouldResemble, []string{"top"}) - - }) - - Convey("The link should be correctly typed", func() { - nodes := queryShape["nodes"].([]Node) - links := queryShape["links"].([]Link) - So(links[0].Source, ShouldEqual, nodes[2].Id) - So(links[0].Target, ShouldEqual, nodes[0].Id) - So(links[0].LinkNode, ShouldEqual, nodes[1].Id) - So(links[0].Pred, ShouldEqual, 0) - - }) - - }) - - Convey("Given a name-of-an-and-iterator's shape", t, func() { - queryShape = make(map[string]interface{}) - hasa1 := buildHasaWithTag(ts, "tag1", "cool") - hasa1.AddTag("hasa1") - hasa2 := buildHasaWithTag(ts, "tag2", "fun") - hasa1.AddTag("hasa2") - andInternal := NewAndIterator() - andInternal.AddSubIterator(hasa1) - andInternal.AddSubIterator(hasa2) - fixed_pred := ts.MakeFixed() - fixed_pred.AddValue(ts.GetIdFor("name")) - lto1 := NewLinksToIterator(ts, andInternal, "s") - lto2 := NewLinksToIterator(ts, fixed_pred, "p") - and := NewAndIterator() - and.AddSubIterator(lto1) - and.AddSubIterator(lto2) - hasa := NewHasaIterator(ts, and, "o") - OutputQueryShapeForIterator(hasa, ts, &queryShape) - - Convey("It should have seven nodes and three links", func() { - nodes := queryShape["nodes"].([]Node) - links := queryShape["links"].([]Link) - So(len(nodes), ShouldEqual, 7) - So(len(links), ShouldEqual, 3) - }) - - Convey("Three of the nodes are link nodes, four aren't", func() { - nodes := queryShape["nodes"].([]Node) - count := 0 - for _, node := range nodes { - if node.IsLinkNode { - count++ - } - } - So(count, ShouldEqual, 3) - }) - - Convey("These nodes should be correctly tagged", nil) - - }) - -} diff --git a/src/graph/result-tree-evaluator.go b/src/graph/result-tree-evaluator.go deleted file mode 100644 index e75cf56..0000000 --- a/src/graph/result-tree-evaluator.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "container/list" - "fmt" -) - -type ResultTree struct { - result TSVal - subtrees *list.List -} - -func NewResultTree(result TSVal) *ResultTree { - var tree ResultTree - tree.subtrees = list.New() - tree.result = result - return &tree -} - -func (tree *ResultTree) ToString() string { - base := fmt.Sprintf("(%d", tree.result) - if tree.subtrees.Len() != 0 { - for e := tree.subtrees.Front(); e != nil; e = e.Next() { - base += fmt.Sprintf(" %s", (e.Value.(*ResultTree)).ToString()) - } - } - base += ")" - return base -} - -func (tree *ResultTree) AddSubtree(sub *ResultTree) { - tree.subtrees.PushBack(sub) -} - -func StringResultTreeEvaluator(it Iterator) string { - ok := true - out := "" - for { - _, ok = it.Next() - if !ok { - break - } - out += it.GetResultTree().ToString() - out += "\n" - for it.NextResult() == true { - out += " " - out += it.GetResultTree().ToString() - out += "\n" - } - } - return out -} - -func PrintResultTreeEvaluator(it Iterator) { - fmt.Print(StringResultTreeEvaluator(it)) -} diff --git a/src/graph/result-tree-evaluator_test.go b/src/graph/result-tree-evaluator_test.go deleted file mode 100644 index 349bc08..0000000 --- a/src/graph/result-tree-evaluator_test.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" -) - -func TestSingleIterator(t *testing.T) { - all := NewInt64AllIterator(1, 3) - result := StringResultTreeEvaluator(all) - expected := "(1)\n(2)\n(3)\n" - if expected != result { - t.Errorf("Expected \"%s\" got \"%s\"", expected, result) - } -} - -func TestAndIterator(t *testing.T) { - all1 := NewInt64AllIterator(1, 3) - all2 := NewInt64AllIterator(3, 5) - and := NewAndIterator() - and.AddSubIterator(all1) - and.AddSubIterator(all2) - - result := StringResultTreeEvaluator(and) - expected := "(3 (3) (3))\n" - if expected != result { - t.Errorf("Expected \"%s\" got \"%s\"", expected, result) - } -} diff --git a/src/graph/session.go b/src/graph/session.go deleted file mode 100644 index 03a137b..0000000 --- a/src/graph/session.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines the graph session interface general to all query languages. - -type ParseResult int - -const ( - Parsed ParseResult = iota - ParseMore - ParseFail -) - -type Session interface { - // Return whether the string is a valid expression. - InputParses(string) (ParseResult, error) - ExecInput(string, chan interface{}, int) - ToText(interface{}) string - ToggleDebug() -} - -type HttpSession interface { - // Return whether the string is a valid expression. - InputParses(string) (ParseResult, error) - // Runs the query and returns individual results on the channel. - ExecInput(string, chan interface{}, int) - GetQuery(string, chan map[string]interface{}) - BuildJson(interface{}) - GetJson() (interface{}, error) - ClearJson() - ToggleDebug() -} diff --git a/src/graph/triple.go b/src/graph/triple.go deleted file mode 100644 index 5abdb59..0000000 --- a/src/graph/triple.go +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines the struct which makes the TripleStore possible -- the triple. -// -// At its heart, it consists of three fields -- Subject, Predicate, and Object. -// Three IDs that relate to each other. That's all there is to it. The triples -// are the links in the graph, and the existence of node IDs is defined by the -// fact that some triple in the graph mentions them. -// -// This means that a complete representation of the graph is equivalent to a -// list of triples. The rest is just indexing for speed. -// -// Adding fields to the triple is not to be taken lightly. You'll see I mention -// provenance, but don't as yet use it in any backing store. In general, there -// can be features that can be turned on or off for any store, but I haven't -// decided how to allow/disallow them yet. Another such example would be to add -// a forward and reverse index field -- forward being "order the list of -// objects pointed at by this subject with this predicate" such as first and -// second children, top billing, what have you. -// -// There will never be that much in this file except for the definition, but -// the consequences are not to be taken lightly. But do suggest cool features! - -import ( - "fmt" - "reflect" -) - -// Our triple struct, used throughout. -type Triple struct { - Sub string `json:"subject"` - Pred string `json:"predicate"` - Obj string `json:"object"` - Provenance string `json:"provenance,omitempty"` -} - -func NewTriple() *Triple { - return &Triple{} -} - -func MakeTriple(sub string, pred string, obj string, provenance string) *Triple { - return &Triple{sub, pred, obj, provenance} -} - -// List of the valid directions of a triple. -// TODO(barakmich): Replace all instances of "dir string" in the codebase -// with an enum of valid directions, to make this less stringly typed. -var TripleDirections = [4]string{"s", "p", "o", "c"} - -// Per-field accessor for triples -func (t *Triple) Get(dir string) string { - if dir == "s" { - return t.Sub - } else if dir == "p" { - return t.Pred - } else if dir == "prov" || dir == "c" { - return t.Provenance - } else if dir == "o" { - return t.Obj - } else { - panic(fmt.Sprintf("No Such Triple Direction, %s", dir)) - } -} - -func (t *Triple) Equals(other *Triple) bool { - return reflect.DeepEqual(t, other) -} - -// Pretty-prints a triple. -func (t *Triple) ToString() string { - return fmt.Sprintf("%s -- %s -> %s\n", t.Sub, t.Pred, t.Obj) -} - -func (t *Triple) IsValid() bool { - if t.Sub == "" { - return false - } - if t.Pred == "" { - return false - } - if t.Obj == "" { - return false - } - return true -} - -// Prints a triple in N-Triple format. -func (t *Triple) ToNTriple() string { - if t.Provenance == "" { - //TODO(barakmich): Proper escaping. - return fmt.Sprintf("%s %s %s .", t.Sub, t.Pred, t.Obj) - } else { - return fmt.Sprintf("%s %s %s %s .", t.Sub, t.Pred, t.Obj, t.Provenance) - } -} diff --git a/src/graph/triplestore.go b/src/graph/triplestore.go deleted file mode 100644 index 1e6d9ce..0000000 --- a/src/graph/triplestore.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// Defines the TripleStore interface. Every backing store must implement at -// least this interface. -// -// Most of these are pretty straightforward. As long as we can surface this -// interface, the rest of the stack will "just work" and we can connect to any -// triple backing store we prefer. - -import ( - "github.com/barakmich/glog" -) - -// Defines an opaque "triple store value" type. However the backend wishes to -// implement it, a TSVal is merely a token to a triple or a node that the backing -// store itself understands, and the base iterators pass around. -// -// For example, in a very traditional, graphd-style graph, these are int64s -// (guids of the primitives). In a very direct sort of graph, these could be -// pointers to structs, or merely triples, or whatever works best for the -// backing store. -type TSVal interface{} - -type TripleStore interface { - // Add a triple to the store. - AddTriple(*Triple) - - // Add a set of triples to the store, atomically if possible. - AddTripleSet([]*Triple) - - // Removes a triple matching the given one from the database, - // if it exists. Does nothing otherwise. - RemoveTriple(*Triple) - - // Given an opaque token, returns the triple for that token from the store. - GetTriple(TSVal) *Triple - - // Given a direction and a token, creates an iterator of links which have - // that node token in that directional field. - GetTripleIterator(string, TSVal) Iterator - - // Returns an iterator enumerating all nodes in the graph. - GetNodesAllIterator() Iterator - - // Returns an iterator enumerating all links in the graph. - GetTriplesAllIterator() Iterator - - // Given a node ID, return the opaque token used by the TripleStore - // to represent that id. - GetIdFor(string) TSVal - - // Given an opaque token, return the node that it represents. - GetNameFor(TSVal) string - - // Returns the number of triples currently stored. - Size() int64 - - // Creates a Fixed iterator which can compare TSVals - MakeFixed() *FixedIterator - - // Optimize an iterator in the context of the triple store. - // Suppose we have a better index for the passed tree; this - // gives the TripleStore the oppotunity to replace it - // with a more efficient iterator. - OptimizeIterator(it Iterator) (Iterator, bool) - - // Close the triple store and clean up. (Flush to disk, cleanly - // sever connections, etc) - Close() - - // Convienence function for speed. Given a triple token and a direction - // return the node token for that direction. Sometimes, a TripleStore - // can do this without going all the way to the backing store, and - // gives the TripleStore the opportunity to make this optimization. - // - // Iterators will call this. At worst, a valid implementation is - // self.GetIdFor(self.GetTriple(triple_id).Get(dir)) - GetTripleDirection(triple_id TSVal, dir string) TSVal -} - -type OptionsDict map[string]interface{} - -func (d OptionsDict) GetIntKey(key string) (int, bool) { - if val, ok := d[key]; ok { - switch vv := val.(type) { - case float64: - return int(vv), true - default: - glog.Fatalln("Invalid", key, "parameter type from config.") - } - } - return 0, false -} - -func (d OptionsDict) GetStringKey(key string) (string, bool) { - if val, ok := d[key]; ok { - switch vv := val.(type) { - case string: - return vv, true - default: - glog.Fatalln("Invalid", key, "parameter type from config.") - } - } - return "", false -} diff --git a/src/graph/value-comparison-iterator.go b/src/graph/value-comparison-iterator.go deleted file mode 100644 index ecfe86e..0000000 --- a/src/graph/value-comparison-iterator.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -// "Value Comparison" is a unary operator -- a filter across the values in the -// relevant subiterator. -// -// This is hugely useful for things like provenance, but value ranges in general -// come up from time to time. At *worst* we're as big as our underlying iterator. -// At best, we're the null iterator. -// -// This is ripe for backend-side optimization. If you can run a value iterator, -// from a sorted set -- some sort of value index, then go for it. -// -// In MQL terms, this is the [{"age>=": 21}] concept. - -import ( - "fmt" - "log" - "strconv" - "strings" -) - -type ComparisonOperator int - -const ( - kCompareLT ComparisonOperator = iota - kCompareLTE - kCompareGT - kCompareGTE - // Why no Equals? Because that's usually an AndIterator. -) - -type ValueComparisonIterator struct { - BaseIterator - subIt Iterator - op ComparisonOperator - comparisonValue interface{} - ts TripleStore -} - -func NewValueComparisonIterator( - subIt Iterator, - operator ComparisonOperator, - value interface{}, - ts TripleStore) *ValueComparisonIterator { - - var vc ValueComparisonIterator - BaseIteratorInit(&vc.BaseIterator) - vc.subIt = subIt - vc.op = operator - vc.comparisonValue = value - vc.ts = ts - return &vc -} - -// Here's the non-boilerplate part of the ValueComparison iterator. Given a value -// and our operator, determine whether or not we meet the requirement. -func (vc *ValueComparisonIterator) doComparison(val TSVal) bool { - //TODO(barakmich): Implement string comparison. - nodeStr := vc.ts.GetNameFor(val) - switch cVal := vc.comparisonValue.(type) { - case int: - cInt := int64(cVal) - intVal, err := strconv.ParseInt(nodeStr, 10, 64) - if err != nil { - return false - } - return RunIntOp(intVal, vc.op, cInt) - case int64: - intVal, err := strconv.ParseInt(nodeStr, 10, 64) - if err != nil { - return false - } - return RunIntOp(intVal, vc.op, cVal) - default: - return true - } -} - -func (vc *ValueComparisonIterator) Close() { - vc.subIt.Close() -} - -func RunIntOp(a int64, op ComparisonOperator, b int64) bool { - switch op { - case kCompareLT: - return a < b - case kCompareLTE: - return a <= b - case kCompareGT: - return a > b - case kCompareGTE: - return a >= b - default: - log.Fatal("Unknown operator type") - return false - } -} - -func (vc *ValueComparisonIterator) Reset() { - vc.subIt.Reset() -} - -func (vc *ValueComparisonIterator) Clone() Iterator { - out := NewValueComparisonIterator(vc.subIt.Clone(), vc.op, vc.comparisonValue, vc.ts) - out.CopyTagsFrom(vc) - return out -} - -func (vc *ValueComparisonIterator) Next() (TSVal, bool) { - var val TSVal - var ok bool - for { - val, ok = vc.subIt.Next() - if !ok { - return nil, false - } - if vc.doComparison(val) { - break - } - } - vc.Last = val - return val, ok -} - -func (vc *ValueComparisonIterator) NextResult() bool { - for { - hasNext := vc.subIt.NextResult() - if !hasNext { - return false - } - if vc.doComparison(vc.subIt.LastResult()) { - return true - } - } - vc.Last = vc.subIt.LastResult() - return true -} - -func (vc *ValueComparisonIterator) Check(val TSVal) bool { - if !vc.doComparison(val) { - return false - } - return vc.subIt.Check(val) -} - -// If we failed the check, then the subiterator should not contribute to the result -// set. Otherwise, go ahead and tag it. -func (vc *ValueComparisonIterator) TagResults(out *map[string]TSVal) { - vc.BaseIterator.TagResults(out) - vc.subIt.TagResults(out) -} - -// Registers the value-comparison iterator. -func (vc *ValueComparisonIterator) Type() string { return "value-comparison" } - -// Prints the value-comparison and its subiterator. -func (vc *ValueComparisonIterator) DebugString(indent int) string { - return fmt.Sprintf("%s(%s\n%s)", - strings.Repeat(" ", indent), - vc.Type(), vc.subIt.DebugString(indent+4)) -} - -// There's nothing to optimize, locally, for a value-comparison iterator. -// Replace the underlying iterator if need be. -// potentially replace it. -func (vc *ValueComparisonIterator) Optimize() (Iterator, bool) { - newSub, changed := vc.subIt.Optimize() - if changed { - vc.subIt.Close() - vc.subIt = newSub - } - return vc, false -} - -// We're only as expensive as our subiterator. -// Again, optimized value comparison iterators should do better. -func (vc *ValueComparisonIterator) GetStats() *IteratorStats { - return vc.subIt.GetStats() -} diff --git a/src/graph/value-comparison-iterator_test.go b/src/graph/value-comparison-iterator_test.go deleted file mode 100644 index 23c795d..0000000 --- a/src/graph/value-comparison-iterator_test.go +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph - -import ( - "testing" -) - -func SetupMockTripleStore(nameMap map[string]int) *TestTripleStore { - ts := new(TestTripleStore) - for k, v := range nameMap { - ts.On("GetIdFor", k).Return(v) - ts.On("GetNameFor", v).Return(k) - } - return ts -} - -func SimpleValueTripleStore() *TestTripleStore { - ts := SetupMockTripleStore(map[string]int{ - "0": 0, - "1": 1, - "2": 2, - "3": 3, - "4": 4, - "5": 5, - }) - return ts -} - -func BuildFixedIterator() *FixedIterator { - fixed := newFixedIterator() - fixed.AddValue(0) - fixed.AddValue(1) - fixed.AddValue(2) - fixed.AddValue(3) - fixed.AddValue(4) - return fixed -} - -func checkIteratorContains(ts TripleStore, it Iterator, expected []string, t *testing.T) { - var actual []string - actual = nil - for { - val, ok := it.Next() - if !ok { - break - } - actual = append(actual, ts.GetNameFor(val)) - } - actualSet := actual[:] - for _, a := range expected { - found := false - for j, b := range actualSet { - if a == b { - actualSet = append(actualSet[:j], actualSet[j+1:]...) - found = true - break - } - } - if !found { - t.Error("Couldn't find", a, "in actual output.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) - return - } - } - if len(actualSet) != 0 { - t.Error("Actual output has more than expected.\nActual:", actual, "\nExpected: ", expected, "\nRemainder: ", actualSet) - } -} - -func TestWorkingIntValueComparison(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareLT, int64(3), ts) - checkIteratorContains(ts, vc, []string{"0", "1", "2"}, t) -} - -func TestFailingIntValueComparison(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareLT, int64(0), ts) - checkIteratorContains(ts, vc, []string{}, t) -} - -func TestWorkingGT(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareGT, int64(2), ts) - checkIteratorContains(ts, vc, []string{"3", "4"}, t) -} - -func TestWorkingGTE(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareGTE, int64(2), ts) - checkIteratorContains(ts, vc, []string{"2", "3", "4"}, t) -} - -func TestVCICheck(t *testing.T) { - ts := SimpleValueTripleStore() - fixed := BuildFixedIterator() - vc := NewValueComparisonIterator(fixed, kCompareGTE, int64(2), ts) - if vc.Check(1) { - t.Error("1 is less than 2, should be GTE") - } - if !vc.Check(2) { - t.Error("2 is GTE 2") - } - if !vc.Check(3) { - t.Error("3 is GTE 2") - } - if vc.Check(5) { - t.Error("5 is not in the underlying iterator") - } -} diff --git a/src/graph_leveldb/leveldb-all-iterator.go b/src/graph_leveldb/leveldb-all-iterator.go deleted file mode 100644 index 78f9bdc..0000000 --- a/src/graph_leveldb/leveldb-all-iterator.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_leveldb - -import ( - "bytes" - "fmt" - leveldb_it "github.com/syndtr/goleveldb/leveldb/iterator" - leveldb_opt "github.com/syndtr/goleveldb/leveldb/opt" - "graph" - "strings" -) - -type LevelDBAllIterator struct { - graph.BaseIterator - prefix []byte - dir string - open bool - it leveldb_it.Iterator - ts *LevelDBTripleStore - ro *leveldb_opt.ReadOptions -} - -func NewLevelDBAllIterator(prefix, dir string, ts *LevelDBTripleStore) *LevelDBAllIterator { - var it LevelDBAllIterator - graph.BaseIteratorInit(&it.BaseIterator) - it.ro = &leveldb_opt.ReadOptions{} - it.ro.DontFillCache = true - it.it = ts.db.NewIterator(nil, it.ro) - it.prefix = []byte(prefix) - it.dir = dir - it.open = true - it.ts = ts - it.it.Seek(it.prefix) - if !it.it.Valid() { - it.open = false - it.it.Release() - } - return &it -} - -func (a *LevelDBAllIterator) Reset() { - if !a.open { - a.it = a.ts.db.NewIterator(nil, a.ro) - a.open = true - } - a.it.Seek(a.prefix) - if !a.it.Valid() { - a.open = false - a.it.Release() - } -} - -func (a *LevelDBAllIterator) Clone() graph.Iterator { - out := NewLevelDBAllIterator(string(a.prefix), a.dir, a.ts) - out.CopyTagsFrom(a) - return out -} - -func (a *LevelDBAllIterator) Next() (graph.TSVal, bool) { - if !a.open { - a.Last = nil - return nil, false - } - var out []byte - out = make([]byte, len(a.it.Key())) - copy(out, a.it.Key()) - a.it.Next() - if !a.it.Valid() { - a.Close() - } - if !bytes.HasPrefix(out, a.prefix) { - a.Close() - return nil, false - } - a.Last = out - return out, true -} - -func (a *LevelDBAllIterator) Check(v graph.TSVal) bool { - a.Last = v - return true -} - -func (lit *LevelDBAllIterator) Close() { - if lit.open { - lit.it.Release() - lit.open = false - } -} - -func (a *LevelDBAllIterator) Size() (int64, bool) { - size, err := a.ts.GetApproximateSizeForPrefix(a.prefix) - if err == nil { - return size, false - } - // INT64_MAX - return int64(^uint64(0) >> 1), false -} - -func (lit *LevelDBAllIterator) DebugString(indent int) string { - size, _ := lit.Size() - return fmt.Sprintf("%s(%s tags: %v leveldb size:%d %s %p)", strings.Repeat(" ", indent), lit.Type(), lit.Tags(), size, lit.dir, lit) -} - -func (lit *LevelDBAllIterator) Type() string { return "all" } -func (lit *LevelDBAllIterator) Sorted() bool { return false } - -func (lit *LevelDBAllIterator) Optimize() (graph.Iterator, bool) { - return lit, false -} - -func (lit *LevelDBAllIterator) GetStats() *graph.IteratorStats { - s, _ := lit.Size() - return &graph.IteratorStats{ - CheckCost: 1, - NextCost: 2, - Size: s, - } -} diff --git a/src/graph_leveldb/leveldb-iterator.go b/src/graph_leveldb/leveldb-iterator.go deleted file mode 100644 index 0e611f1..0000000 --- a/src/graph_leveldb/leveldb-iterator.go +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_leveldb - -import ( - "bytes" - _ "encoding/binary" - "fmt" - leveldb_it "github.com/syndtr/goleveldb/leveldb/iterator" - leveldb_opt "github.com/syndtr/goleveldb/leveldb/opt" - "graph" - "strings" -) - -type LevelDBIterator struct { - graph.BaseIterator - nextPrefix []byte - checkId []byte - dir string - open bool - it leveldb_it.Iterator - ts *LevelDBTripleStore - ro *leveldb_opt.ReadOptions - originalPrefix string -} - -func NewLevelDBIterator(prefix, dir string, value graph.TSVal, ts *LevelDBTripleStore) *LevelDBIterator { - var it LevelDBIterator - graph.BaseIteratorInit(&it.BaseIterator) - it.checkId = value.([]byte) - it.dir = dir - it.originalPrefix = prefix - it.nextPrefix = make([]byte, 0, 2+ts.hasher.Size()) - it.nextPrefix = append(it.nextPrefix, []byte(prefix)...) - it.nextPrefix = append(it.nextPrefix, []byte(it.checkId[1:])...) - it.ro = &leveldb_opt.ReadOptions{} - it.ro.DontFillCache = true - it.it = ts.db.NewIterator(nil, it.ro) - it.open = true - it.ts = ts - ok := it.it.Seek(it.nextPrefix) - if !ok { - it.open = false - it.it.Release() - } - return &it -} - -func (lit *LevelDBIterator) Reset() { - if !lit.open { - lit.it = lit.ts.db.NewIterator(nil, lit.ro) - lit.open = true - } - ok := lit.it.Seek(lit.nextPrefix) - if !ok { - lit.open = false - lit.it.Release() - } -} - -func (lit *LevelDBIterator) Clone() graph.Iterator { - out := NewLevelDBIterator(lit.originalPrefix, lit.dir, lit.checkId, lit.ts) - out.CopyTagsFrom(lit) - return out -} - -func (lit *LevelDBIterator) Close() { - if lit.open { - lit.it.Release() - lit.open = false - } -} - -func (lit *LevelDBIterator) Next() (graph.TSVal, bool) { - if lit.it == nil { - lit.Last = nil - return nil, false - } - if !lit.open { - lit.Last = nil - return nil, false - } - if !lit.it.Valid() { - lit.Last = nil - lit.Close() - return nil, false - } - if bytes.HasPrefix(lit.it.Key(), lit.nextPrefix) { - out := make([]byte, len(lit.it.Key())) - copy(out, lit.it.Key()) - lit.Last = out - ok := lit.it.Next() - if !ok { - lit.Close() - } - return out, true - } - lit.Close() - lit.Last = nil - return nil, false -} - -func GetPositionFromPrefix(prefix []byte, dir string, ts *LevelDBTripleStore) int { - if bytes.Equal(prefix, []byte("sp")) { - switch dir { - case "s": - return 2 - case "p": - return ts.hasher.Size() + 2 - case "o": - return 2*ts.hasher.Size() + 2 - case "c": - return -1 - } - } - if bytes.Equal(prefix, []byte("po")) { - switch dir { - case "s": - return 2*ts.hasher.Size() + 2 - case "p": - return 2 - case "o": - return ts.hasher.Size() + 2 - case "c": - return -1 - } - } - if bytes.Equal(prefix, []byte("os")) { - switch dir { - case "s": - return ts.hasher.Size() + 2 - case "p": - return 2*ts.hasher.Size() + 2 - case "o": - return 2 - case "c": - return -1 - } - } - if bytes.Equal(prefix, []byte("cp")) { - switch dir { - case "s": - return 2*ts.hasher.Size() + 2 - case "p": - return ts.hasher.Size() + 2 - case "o": - return 3*ts.hasher.Size() + 2 - case "c": - return 2 - } - } - panic("Notreached") -} - -func (lit *LevelDBIterator) Check(v graph.TSVal) bool { - val := v.([]byte) - if val[0] == 'z' { - return false - } - offset := GetPositionFromPrefix(val[0:2], lit.dir, lit.ts) - if offset != -1 { - if bytes.HasPrefix(val[offset:], lit.checkId[1:]) { - return true - } - } else { - nameForDir := lit.ts.GetTriple(v).Get(lit.dir) - hashForDir := lit.ts.GetIdFor(nameForDir).([]byte) - if bytes.Equal(hashForDir, lit.checkId) { - return true - } - } - return false -} - -func (lit *LevelDBIterator) Size() (int64, bool) { - return lit.ts.GetSizeFor(lit.checkId), true -} - -func (lit *LevelDBIterator) DebugString(indent int) string { - size, _ := lit.Size() - return fmt.Sprintf("%s(%s %d tags: %v dir: %s size:%d %s)", strings.Repeat(" ", indent), lit.Type(), lit.GetUid(), lit.Tags(), lit.dir, size, lit.ts.GetNameFor(lit.checkId)) -} - -func (lit *LevelDBIterator) Type() string { return "leveldb" } -func (lit *LevelDBIterator) Sorted() bool { return false } - -func (lit *LevelDBIterator) Optimize() (graph.Iterator, bool) { - return lit, false -} - -func (lit *LevelDBIterator) GetStats() *graph.IteratorStats { - s, _ := lit.Size() - return &graph.IteratorStats{ - CheckCost: 1, - NextCost: 2, - Size: s, - } -} diff --git a/src/graph_leveldb/leveldb-triplestore-iterator-optimize.go b/src/graph_leveldb/leveldb-triplestore-iterator-optimize.go deleted file mode 100644 index 99ae611..0000000 --- a/src/graph_leveldb/leveldb-triplestore-iterator-optimize.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_leveldb - -import ( - "graph" -) - -func (ts *LevelDBTripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { - switch it.Type() { - case "linksto": - return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) - - } - return it, false -} - -func (ts *LevelDBTripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { - l := it.GetSubIterators() - if l.Len() != 1 { - return it, false - } - primaryIt := l.Front().Value.(graph.Iterator) - if primaryIt.Type() == "fixed" { - size, _ := primaryIt.Size() - if size == 1 { - val, ok := primaryIt.Next() - if !ok { - panic("Sizes lie") - } - newIt := ts.GetTripleIterator(it.Direction(), val) - newIt.CopyTagsFrom(it) - for _, tag := range primaryIt.Tags() { - newIt.AddFixedTag(tag, val) - } - it.Close() - return newIt, true - } - } - return it, false -} diff --git a/src/graph_leveldb/leveldb-triplestore.go b/src/graph_leveldb/leveldb-triplestore.go deleted file mode 100644 index e459bfd..0000000 --- a/src/graph_leveldb/leveldb-triplestore.go +++ /dev/null @@ -1,427 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_leveldb - -import ( - "bytes" - "crypto/sha1" - "encoding/binary" - "encoding/json" - "fmt" - "github.com/barakmich/glog" - "github.com/syndtr/goleveldb/leveldb" - leveldb_cache "github.com/syndtr/goleveldb/leveldb/cache" - leveldb_opt "github.com/syndtr/goleveldb/leveldb/opt" - leveldb_util "github.com/syndtr/goleveldb/leveldb/util" - "graph" - "hash" -) - -const DefaultCacheSize = 2 -const DefaultWriteBufferSize = 20 - -type LevelDBTripleStore struct { - dbOpts *leveldb_opt.Options - db *leveldb.DB - path string - open bool - size int64 - hasher hash.Hash - writeopts *leveldb_opt.WriteOptions - readopts *leveldb_opt.ReadOptions -} - -func CreateNewLevelDB(path string) bool { - opts := &leveldb_opt.Options{} - db, err := leveldb.OpenFile(path, opts) - if err != nil { - glog.Errorln("Error: couldn't create database", err) - return false - } - defer db.Close() - ts := &LevelDBTripleStore{} - ts.db = db - ts.writeopts = &leveldb_opt.WriteOptions{ - Sync: true, - } - ts.Close() - return true -} - -func NewDefaultLevelDBTripleStore(path string, options graph.OptionsDict) *LevelDBTripleStore { - var ts LevelDBTripleStore - ts.path = path - cache_size := DefaultCacheSize - if val, ok := options.GetIntKey("cache_size_mb"); ok { - cache_size = val - } - ts.dbOpts = &leveldb_opt.Options{ - BlockCache: leveldb_cache.NewLRUCache(cache_size * leveldb_opt.MiB), - } - ts.dbOpts.ErrorIfMissing = true - - write_buffer_mb := DefaultWriteBufferSize - if val, ok := options.GetIntKey("write_buffer_mb"); ok { - write_buffer_mb = val - } - ts.dbOpts.WriteBuffer = write_buffer_mb * leveldb_opt.MiB - ts.hasher = sha1.New() - ts.writeopts = &leveldb_opt.WriteOptions{ - Sync: false, - } - ts.readopts = &leveldb_opt.ReadOptions{} - db, err := leveldb.OpenFile(ts.path, ts.dbOpts) - if err != nil { - panic("Error, couldn't open! " + err.Error()) - } - ts.db = db - glog.Infoln(ts.GetStats()) - ts.getSize() - return &ts -} - -func (ts *LevelDBTripleStore) GetStats() string { - out := "" - stats, err := ts.db.GetProperty("leveldb.stats") - if err == nil { - out += fmt.Sprintln("Stats: ", stats) - } - out += fmt.Sprintln("Size: ", ts.size) - return out -} - -func (ts *LevelDBTripleStore) Size() int64 { - return ts.size -} - -func (ts *LevelDBTripleStore) createKeyFor(dir1, dir2, dir3 string, triple *graph.Triple) []byte { - key := make([]byte, 0, 2+(ts.hasher.Size()*3)) - key = append(key, []byte(dir1+dir2)...) - key = append(key, ts.convertStringToByteHash(triple.Get(dir1))...) - key = append(key, ts.convertStringToByteHash(triple.Get(dir2))...) - key = append(key, ts.convertStringToByteHash(triple.Get(dir3))...) - return key -} - -func (ts *LevelDBTripleStore) createProvKeyFor(dir1, dir2, dir3 string, triple *graph.Triple) []byte { - key := make([]byte, 0, 2+(ts.hasher.Size()*4)) - key = append(key, []byte("c"+dir1)...) - key = append(key, ts.convertStringToByteHash(triple.Get("c"))...) - key = append(key, ts.convertStringToByteHash(triple.Get(dir1))...) - key = append(key, ts.convertStringToByteHash(triple.Get(dir2))...) - key = append(key, ts.convertStringToByteHash(triple.Get(dir3))...) - return key -} - -func (ts *LevelDBTripleStore) createValueKeyFor(s string) []byte { - key := make([]byte, 0, 1+ts.hasher.Size()) - key = append(key, []byte("z")...) - key = append(key, ts.convertStringToByteHash(s)...) - return key -} - -func (ts *LevelDBTripleStore) AddTriple(t *graph.Triple) { - batch := &leveldb.Batch{} - ts.buildWrite(batch, t) - err := ts.db.Write(batch, ts.writeopts) - if err != nil { - glog.Errorf("Couldn't write to DB for triple %s", t.ToString()) - return - } - ts.size++ -} - -func (ts *LevelDBTripleStore) RemoveTriple(t *graph.Triple) { - _, err := ts.db.Get(ts.createKeyFor("s", "p", "o", t), ts.readopts) - if err != nil && err != leveldb.ErrNotFound { - glog.Errorf("Couldn't access DB to confirm deletion") - return - } - if err == leveldb.ErrNotFound { - // No such triple in the database, forget about it. - return - } - batch := &leveldb.Batch{} - batch.Delete(ts.createKeyFor("s", "p", "o", t)) - batch.Delete(ts.createKeyFor("o", "s", "p", t)) - batch.Delete(ts.createKeyFor("p", "o", "s", t)) - ts.UpdateValueKeyBy(t.Get("s"), -1, batch) - ts.UpdateValueKeyBy(t.Get("p"), -1, batch) - ts.UpdateValueKeyBy(t.Get("o"), -1, batch) - if t.Get("c") != "" { - batch.Delete(ts.createProvKeyFor("p", "s", "o", t)) - ts.UpdateValueKeyBy(t.Get("c"), -1, batch) - } - err = ts.db.Write(batch, nil) - if err != nil { - glog.Errorf("Couldn't delete triple %s", t.ToString()) - return - } - ts.size-- -} - -func (ts *LevelDBTripleStore) buildTripleWrite(batch *leveldb.Batch, t *graph.Triple) { - bytes, err := json.Marshal(*t) - if err != nil { - glog.Errorf("Couldn't write to buffer for triple %s\n %s\n", t.ToString(), err) - return - } - batch.Put(ts.createKeyFor("s", "p", "o", t), bytes) - batch.Put(ts.createKeyFor("o", "s", "p", t), bytes) - batch.Put(ts.createKeyFor("p", "o", "s", t), bytes) - if t.Get("c") != "" { - batch.Put(ts.createProvKeyFor("p", "s", "o", t), bytes) - } -} - -func (ts *LevelDBTripleStore) buildWrite(batch *leveldb.Batch, t *graph.Triple) { - ts.buildTripleWrite(batch, t) - ts.UpdateValueKeyBy(t.Get("s"), 1, nil) - ts.UpdateValueKeyBy(t.Get("p"), 1, nil) - ts.UpdateValueKeyBy(t.Get("o"), 1, nil) - if t.Get("c") != "" { - ts.UpdateValueKeyBy(t.Get("c"), 1, nil) - } -} - -type ValueData struct { - Name string - Size int64 -} - -func (ts *LevelDBTripleStore) UpdateValueKeyBy(name string, amount int, batch *leveldb.Batch) { - value := &ValueData{name, int64(amount)} - key := ts.createValueKeyFor(name) - b, err := ts.db.Get(key, ts.readopts) - - // Error getting the node from the database. - if err != nil && err != leveldb.ErrNotFound { - glog.Errorf("Error reading Value %s from the DB\n", name) - return - } - - // Node exists in the database -- unmarshal and update. - if b != nil && err != leveldb.ErrNotFound { - err = json.Unmarshal(b, value) - if err != nil { - glog.Errorln("Error: couldn't reconstruct value ", err) - return - } - value.Size += int64(amount) - } - - // Are we deleting something? - if amount < 0 { - if value.Size <= 0 { - if batch == nil { - ts.db.Delete(key, ts.writeopts) - } else { - batch.Delete(key) - } - return - } - } - - // Repackage and rewrite. - bytes, err := json.Marshal(&value) - if err != nil { - glog.Errorf("Couldn't write to buffer for value %s\n %s", name, err) - return - } - if batch == nil { - ts.db.Put(key, bytes, ts.writeopts) - } else { - batch.Put(key, bytes) - } -} - -func (ts *LevelDBTripleStore) AddTripleSet(t_s []*graph.Triple) { - batch := &leveldb.Batch{} - newTs := len(t_s) - resizeMap := make(map[string]int) - for _, t := range t_s { - ts.buildTripleWrite(batch, t) - resizeMap[t.Sub]++ - resizeMap[t.Pred]++ - resizeMap[t.Obj]++ - if t.Provenance != "" { - resizeMap[t.Provenance]++ - } - } - for k, v := range resizeMap { - ts.UpdateValueKeyBy(k, v, batch) - } - err := ts.db.Write(batch, ts.writeopts) - if err != nil { - glog.Errorf("Couldn't write to DB for tripleset") - return - } - ts.size += int64(newTs) -} - -func (ldbts *LevelDBTripleStore) Close() { - buf := new(bytes.Buffer) - err := binary.Write(buf, binary.LittleEndian, ldbts.size) - if err == nil { - werr := ldbts.db.Put([]byte("__size"), buf.Bytes(), ldbts.writeopts) - if werr != nil { - glog.Errorf("Couldn't write size before closing!") - } - } else { - glog.Errorf("Couldn't convert size before closing!") - } - ldbts.db.Close() - ldbts.open = false -} - -func (ts *LevelDBTripleStore) GetTriple(k graph.TSVal) *graph.Triple { - var triple graph.Triple - b, err := ts.db.Get(k.([]byte), ts.readopts) - if err != nil && err != leveldb.ErrNotFound { - glog.Errorln("Error: couldn't get triple from DB") - return &graph.Triple{} - } - if err == leveldb.ErrNotFound { - // No harm, no foul. - return &graph.Triple{} - } - err = json.Unmarshal(b, &triple) - if err != nil { - glog.Errorln("Error: couldn't reconstruct triple") - return &graph.Triple{} - } - return &triple -} - -func (ts *LevelDBTripleStore) convertStringToByteHash(s string) []byte { - ts.hasher.Reset() - key := make([]byte, 0, ts.hasher.Size()) - ts.hasher.Write([]byte(s)) - key = ts.hasher.Sum(key) - return key -} - -func (ts *LevelDBTripleStore) GetIdFor(s string) graph.TSVal { - return ts.createValueKeyFor(s) -} - -func (ts *LevelDBTripleStore) getValueData(value_key []byte) ValueData { - var out ValueData - if glog.V(3) { - glog.V(3).Infof("%s %v\n", string(value_key[0]), value_key) - } - b, err := ts.db.Get(value_key, ts.readopts) - if err != nil && err != leveldb.ErrNotFound { - glog.Errorln("Error: couldn't get value from DB") - return out - } - if b != nil && err != leveldb.ErrNotFound { - err = json.Unmarshal(b, &out) - if err != nil { - glog.Errorln("Error: couldn't reconstruct value") - return ValueData{} - } - } - return out -} - -func (ts *LevelDBTripleStore) GetNameFor(k graph.TSVal) string { - if k == nil { - glog.V(2).Infoln("k was nil") - return "" - } - return ts.getValueData(k.([]byte)).Name -} - -func (ts *LevelDBTripleStore) GetSizeFor(k graph.TSVal) int64 { - if k == nil { - return 0 - } - return int64(ts.getValueData(k.([]byte)).Size) -} - -func (ts *LevelDBTripleStore) getSize() { - var size int64 - b, err := ts.db.Get([]byte("__size"), ts.readopts) - if err != nil && err != leveldb.ErrNotFound { - panic("Couldn't read size " + err.Error()) - } - if err == leveldb.ErrNotFound { - // Must be a new database. Cool - ts.size = 0 - return - } - buf := bytes.NewBuffer(b) - err = binary.Read(buf, binary.LittleEndian, &size) - if err != nil { - glog.Errorln("Error: couldn't parse size") - } - ts.size = size -} - -func (ts *LevelDBTripleStore) GetApproximateSizeForPrefix(pre []byte) (int64, error) { - limit := make([]byte, len(pre)) - copy(limit, pre) - end := len(limit) - 1 - limit[end]++ - ranges := make([]leveldb_util.Range, 1) - ranges[0].Start = pre - ranges[0].Limit = limit - sizes, err := ts.db.GetApproximateSizes(ranges) - if err == nil { - return (int64(sizes[0]) >> 6) + 1, nil - } - return 0, nil -} - -func (ts *LevelDBTripleStore) GetTripleIterator(dir string, val graph.TSVal) graph.Iterator { - switch dir { - case "s": - return NewLevelDBIterator("sp", "s", val, ts) - case "p": - return NewLevelDBIterator("po", "p", val, ts) - case "o": - return NewLevelDBIterator("os", "o", val, ts) - case "c": - return NewLevelDBIterator("cp", "c", val, ts) - } - panic("Notreached " + dir) -} - -func (ts *LevelDBTripleStore) GetNodesAllIterator() graph.Iterator { - return NewLevelDBAllIterator("z", "v", ts) -} - -func (ts *LevelDBTripleStore) GetTriplesAllIterator() graph.Iterator { - return NewLevelDBAllIterator("po", "p", ts) -} - -func (ts *LevelDBTripleStore) GetTripleDirection(val graph.TSVal, direction string) graph.TSVal { - v := val.([]uint8) - offset := GetPositionFromPrefix(v[0:2], direction, ts) - if offset != -1 { - return append([]byte("z"), v[offset:offset+ts.hasher.Size()]...) - } else { - return ts.GetTriple(val).Get(direction) - } -} - -func compareBytes(a, b graph.TSVal) bool { - return bytes.Equal(a.([]uint8), b.([]uint8)) -} - -func (ts *LevelDBTripleStore) MakeFixed() *graph.FixedIterator { - return graph.NewFixedIteratorWithCompare(compareBytes) -} diff --git a/src/graph_leveldb/leveldb_test.go b/src/graph_leveldb/leveldb_test.go deleted file mode 100644 index 4b17827..0000000 --- a/src/graph_leveldb/leveldb_test.go +++ /dev/null @@ -1,433 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_leveldb - -import ( - . "github.com/smartystreets/goconvey/convey" - "graph" - "io/ioutil" - "os" - "sort" - "testing" -) - -func makeTripleSet() []*graph.Triple { - tripleSet := []*graph.Triple{ - graph.MakeTriple("A", "follows", "B", ""), - graph.MakeTriple("C", "follows", "B", ""), - graph.MakeTriple("C", "follows", "D", ""), - graph.MakeTriple("D", "follows", "B", ""), - graph.MakeTriple("B", "follows", "F", ""), - graph.MakeTriple("F", "follows", "G", ""), - graph.MakeTriple("D", "follows", "G", ""), - graph.MakeTriple("E", "follows", "F", ""), - graph.MakeTriple("B", "status", "cool", "status_graph"), - graph.MakeTriple("D", "status", "cool", "status_graph"), - graph.MakeTriple("G", "status", "cool", "status_graph"), - } - return tripleSet -} - -func extractTripleFromIterator(ts graph.TripleStore, it graph.Iterator) []string { - var output []string - for { - val, ok := it.Next() - if !ok { - break - } - output = append(output, ts.GetTriple(val).ToString()) - } - return output -} - -func extractValuesFromIterator(ts graph.TripleStore, it graph.Iterator) []string { - var output []string - for { - val, ok := it.Next() - if !ok { - break - } - output = append(output, ts.GetNameFor(val)) - } - return output -} - -func TestCreateDatabase(t *testing.T) { - - Convey("Given a database path", t, func() { - tmpDir, err := ioutil.TempDir(os.TempDir(), "cayley_test") - t.Log(tmpDir) - if err != nil { - t.Fatal("Cannot use ioutil.", err) - } - - Convey("Creates a database", func() { - ok := CreateNewLevelDB(tmpDir) - So(ok, ShouldBeTrue) - Convey("And has good defaults for a new database", func() { - ts := NewDefaultLevelDBTripleStore(tmpDir, nil) - So(ts, ShouldNotBeNil) - So(ts.Size(), ShouldEqual, 0) - ts.Close() - }) - }) - - Convey("Fails if it cannot create the database", func() { - ok := CreateNewLevelDB("/dev/null/some terrible path") - So(ok, ShouldBeFalse) - So(func() { NewDefaultLevelDBTripleStore("/dev/null/some terrible path", nil) }, ShouldPanic) - }) - - Reset(func() { - os.RemoveAll(tmpDir) - }) - - }) - -} - -func TestLoadDatabase(t *testing.T) { - var ts *LevelDBTripleStore - - Convey("Given a created database path", t, func() { - tmpDir, _ := ioutil.TempDir(os.TempDir(), "cayley_test") - t.Log(tmpDir) - ok := CreateNewLevelDB(tmpDir) - So(ok, ShouldBeTrue) - ts = NewDefaultLevelDBTripleStore(tmpDir, nil) - - Convey("Can load a single triple", func() { - ts.AddTriple(graph.MakeTriple("Something", "points_to", "Something Else", "context")) - So(ts.GetNameFor(ts.GetIdFor("Something")), ShouldEqual, "Something") - So(ts.Size(), ShouldEqual, 1) - }) - - Convey("Can load many triples", func() { - - ts.AddTripleSet(makeTripleSet()) - So(ts.Size(), ShouldEqual, 11) - So(ts.GetSizeFor(ts.GetIdFor("B")), ShouldEqual, 5) - - Convey("Can delete triples", func() { - ts.RemoveTriple(graph.MakeTriple("A", "follows", "B", "")) - So(ts.Size(), ShouldEqual, 10) - So(ts.GetSizeFor(ts.GetIdFor("B")), ShouldEqual, 4) - }) - }) - - Reset(func() { - ts.Close() - os.RemoveAll(tmpDir) - }) - - }) - -} - -func TestAllIterator(t *testing.T) { - var ts *LevelDBTripleStore - - Convey("Given a prepared database", t, func() { - tmpDir, _ := ioutil.TempDir(os.TempDir(), "cayley_test") - t.Log(tmpDir) - defer os.RemoveAll(tmpDir) - ok := CreateNewLevelDB(tmpDir) - So(ok, ShouldBeTrue) - ts = NewDefaultLevelDBTripleStore(tmpDir, nil) - ts.AddTripleSet(makeTripleSet()) - var it graph.Iterator - - Convey("Can create an all iterator for nodes", func() { - it = ts.GetNodesAllIterator() - So(it, ShouldNotBeNil) - - Convey("Has basics", func() { - size, accurate := it.Size() - So(size, ShouldBeBetween, 0, 20) - So(accurate, ShouldBeFalse) - So(it.Type(), ShouldEqual, "all") - re_it, ok := it.Optimize() - So(ok, ShouldBeFalse) - So(re_it, ShouldPointTo, it) - }) - - Convey("Iterates all nodes", func() { - expected := []string{ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "follows", - "status", - "cool", - "status_graph", - } - sort.Strings(expected) - actual := extractValuesFromIterator(ts, it) - sort.Strings(actual) - So(actual, ShouldResemble, expected) - it.Reset() - actual = extractValuesFromIterator(ts, it) - sort.Strings(actual) - So(actual, ShouldResemble, expected) - - }) - - Convey("Contains a couple nodes", func() { - So(it.Check(ts.GetIdFor("A")), ShouldBeTrue) - So(it.Check(ts.GetIdFor("cool")), ShouldBeTrue) - //So(it.Check(ts.GetIdFor("baller")), ShouldBeFalse) - }) - - Reset(func() { - it.Reset() - }) - }) - - Convey("Can create an all iterator for edges", func() { - it := ts.GetTriplesAllIterator() - So(it, ShouldNotBeNil) - Convey("Has basics", func() { - size, accurate := it.Size() - So(size, ShouldBeBetween, 0, 20) - So(accurate, ShouldBeFalse) - So(it.Type(), ShouldEqual, "all") - re_it, ok := it.Optimize() - So(ok, ShouldBeFalse) - So(re_it, ShouldPointTo, it) - }) - - Convey("Iterates an edge", func() { - edge_val, _ := it.Next() - triple := ts.GetTriple(edge_val) - set := makeTripleSet() - var string_set []string - for _, t := range set { - string_set = append(string_set, t.ToString()) - } - So(triple.ToString(), ShouldBeIn, string_set) - }) - - Reset(func() { - ts.Close() - }) - }) - }) - -} - -func TestSetIterator(t *testing.T) { - var ts *LevelDBTripleStore - var tmpDir string - - Convey("Given a prepared database", t, func() { - tmpDir, _ = ioutil.TempDir(os.TempDir(), "cayley_test") - t.Log(tmpDir) - defer os.RemoveAll(tmpDir) - ok := CreateNewLevelDB(tmpDir) - So(ok, ShouldBeTrue) - ts = NewDefaultLevelDBTripleStore(tmpDir, nil) - ts.AddTripleSet(makeTripleSet()) - var it graph.Iterator - - Convey("Can create a subject iterator", func() { - it = ts.GetTripleIterator("s", ts.GetIdFor("C")) - - Convey("Containing the right things", func() { - expected := []string{ - graph.MakeTriple("C", "follows", "B", "").ToString(), - graph.MakeTriple("C", "follows", "D", "").ToString(), - } - actual := extractTripleFromIterator(ts, it) - sort.Strings(actual) - sort.Strings(expected) - So(actual, ShouldResemble, expected) - }) - - Convey("And checkable", func() { - and := graph.NewAndIterator() - and.AddSubIterator(ts.GetTriplesAllIterator()) - and.AddSubIterator(it) - - expected := []string{ - graph.MakeTriple("C", "follows", "B", "").ToString(), - graph.MakeTriple("C", "follows", "D", "").ToString(), - } - actual := extractTripleFromIterator(ts, and) - sort.Strings(actual) - sort.Strings(expected) - So(actual, ShouldResemble, expected) - }) - Reset(func() { - it.Reset() - }) - - }) - - Convey("Can create an object iterator", func() { - it = ts.GetTripleIterator("o", ts.GetIdFor("F")) - - Convey("Containing the right things", func() { - expected := []string{ - graph.MakeTriple("B", "follows", "F", "").ToString(), - graph.MakeTriple("E", "follows", "F", "").ToString(), - } - actual := extractTripleFromIterator(ts, it) - sort.Strings(actual) - sort.Strings(expected) - So(actual, ShouldResemble, expected) - }) - - Convey("Mutually and-checkable", func() { - and := graph.NewAndIterator() - and.AddSubIterator(ts.GetTripleIterator("s", ts.GetIdFor("B"))) - and.AddSubIterator(it) - - expected := []string{ - graph.MakeTriple("B", "follows", "F", "").ToString(), - } - actual := extractTripleFromIterator(ts, and) - sort.Strings(actual) - sort.Strings(expected) - So(actual, ShouldResemble, expected) - }) - - }) - - Convey("Can create a predicate iterator", func() { - it = ts.GetTripleIterator("p", ts.GetIdFor("status")) - - Convey("Containing the right things", func() { - expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("D", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("G", "status", "cool", "status_graph").ToString(), - } - actual := extractTripleFromIterator(ts, it) - sort.Strings(actual) - sort.Strings(expected) - So(actual, ShouldResemble, expected) - }) - - }) - - Convey("Can create a provenance iterator", func() { - it = ts.GetTripleIterator("c", ts.GetIdFor("status_graph")) - - Convey("Containing the right things", func() { - expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("D", "status", "cool", "status_graph").ToString(), - graph.MakeTriple("G", "status", "cool", "status_graph").ToString(), - } - actual := extractTripleFromIterator(ts, it) - sort.Strings(actual) - sort.Strings(expected) - So(actual, ShouldResemble, expected) - }) - - Convey("Can be cross-checked", func() { - and := graph.NewAndIterator() - // Order is important - and.AddSubIterator(ts.GetTripleIterator("s", ts.GetIdFor("B"))) - and.AddSubIterator(it) - - expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), - } - actual := extractTripleFromIterator(ts, and) - So(actual, ShouldResemble, expected) - }) - - Convey("Can check against other iterators", func() { - and := graph.NewAndIterator() - // Order is important - and.AddSubIterator(it) - and.AddSubIterator(ts.GetTripleIterator("s", ts.GetIdFor("B"))) - - expected := []string{ - graph.MakeTriple("B", "status", "cool", "status_graph").ToString(), - } - actual := extractTripleFromIterator(ts, and) - So(actual, ShouldResemble, expected) - }) - Reset(func() { - it.Reset() - }) - - }) - - Reset(func() { - ts.Close() - }) - - }) - -} - -func TestOptimize(t *testing.T) { - var ts *LevelDBTripleStore - var lto graph.Iterator - var tmpDir string - - Convey("Given a prepared database", t, func() { - tmpDir, _ = ioutil.TempDir(os.TempDir(), "cayley_test") - t.Log(tmpDir) - defer os.RemoveAll(tmpDir) - ok := CreateNewLevelDB(tmpDir) - So(ok, ShouldBeTrue) - ts = NewDefaultLevelDBTripleStore(tmpDir, nil) - ts.AddTripleSet(makeTripleSet()) - - Convey("With an linksto-fixed pair", func() { - fixed := ts.MakeFixed() - fixed.AddValue(ts.GetIdFor("F")) - fixed.AddTag("internal") - lto = graph.NewLinksToIterator(ts, fixed, "o") - - Convey("Creates an appropriate iterator", func() { - oldIt := lto.Clone() - newIt, ok := lto.Optimize() - So(ok, ShouldBeTrue) - So(newIt.Type(), ShouldEqual, "leveldb") - - Convey("Containing the right things", func() { - afterOp := extractTripleFromIterator(ts, newIt) - beforeOp := extractTripleFromIterator(ts, oldIt) - sort.Strings(afterOp) - sort.Strings(beforeOp) - So(afterOp, ShouldResemble, beforeOp) - }) - - Convey("With the correct tags", func() { - oldIt.Next() - newIt.Next() - oldResults := make(map[string]graph.TSVal) - oldIt.TagResults(&oldResults) - newResults := make(map[string]graph.TSVal) - oldIt.TagResults(&newResults) - So(newResults, ShouldResemble, oldResults) - }) - - }) - - }) - - }) - -} diff --git a/src/graph_memstore/llrb-iterator.go b/src/graph_memstore/llrb-iterator.go deleted file mode 100644 index fed4272..0000000 --- a/src/graph_memstore/llrb-iterator.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_memstore - -import ( - "fmt" - "github.com/petar/GoLLRB/llrb" - "graph" - "math" - "strings" -) - -type LlrbIterator struct { - graph.BaseIterator - tree *llrb.LLRB - data string - isRunning bool - iterLast Int64 -} - -type Int64 int64 - -func (i Int64) Less(than llrb.Item) bool { - return i < than.(Int64) -} - -func IterateOne(tree *llrb.LLRB, last Int64) Int64 { - var next Int64 - tree.AscendGreaterOrEqual(last, func(i llrb.Item) bool { - if i.(Int64) == last { - return true - } else { - next = i.(Int64) - return false - } - }) - return next -} - -func NewLlrbIterator(tree *llrb.LLRB, data string) *LlrbIterator { - var it LlrbIterator - graph.BaseIteratorInit(&it.BaseIterator) - it.tree = tree - it.iterLast = Int64(-1) - it.data = data - return &it -} - -func (it *LlrbIterator) Reset() { - it.iterLast = Int64(-1) -} - -func (it *LlrbIterator) Clone() graph.Iterator { - var new_it = NewLlrbIterator(it.tree, it.data) - new_it.CopyTagsFrom(it) - return new_it -} - -func (it *LlrbIterator) Close() {} - -func (it *LlrbIterator) Next() (graph.TSVal, bool) { - graph.NextLogIn(it) - if it.tree.Max() == nil || it.Last == int64(it.tree.Max().(Int64)) { - return graph.NextLogOut(it, nil, false) - } - it.iterLast = IterateOne(it.tree, it.iterLast) - it.Last = int64(it.iterLast) - return graph.NextLogOut(it, it.Last, true) -} - -func (it *LlrbIterator) Size() (int64, bool) { - return int64(it.tree.Len()), true -} - -func (it *LlrbIterator) Check(v graph.TSVal) bool { - graph.CheckLogIn(it, v) - if it.tree.Has(Int64(v.(int64))) { - it.Last = v - return graph.CheckLogOut(it, v, true) - } - return graph.CheckLogOut(it, v, false) -} - -func (it *LlrbIterator) DebugString(indent int) string { - size, _ := it.Size() - return fmt.Sprintf("%s(%s tags:%s size:%d %s)", strings.Repeat(" ", indent), it.Type(), it.Tags(), size, it.data) -} - -func (it *LlrbIterator) Type() string { - return "llrb" -} -func (it *LlrbIterator) Sorted() bool { - return true -} -func (it *LlrbIterator) Optimize() (graph.Iterator, bool) { - return it, false -} - -func (it *LlrbIterator) GetStats() *graph.IteratorStats { - return &graph.IteratorStats{ - CheckCost: int64(math.Log(float64(it.tree.Len()))) + 1, - NextCost: 1, - Size: int64(it.tree.Len()), - } -} diff --git a/src/graph_memstore/memstore-all-iterator.go b/src/graph_memstore/memstore-all-iterator.go deleted file mode 100644 index 21245a0..0000000 --- a/src/graph_memstore/memstore-all-iterator.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_memstore - -import ( - "graph" -) - -type MemstoreAllIterator struct { - graph.Int64AllIterator - ts *MemTripleStore -} - -func NewMemstoreAllIterator(ts *MemTripleStore) *MemstoreAllIterator { - var out MemstoreAllIterator - out.Int64AllIterator = *graph.NewInt64AllIterator(1, ts.idCounter-1) - out.ts = ts - return &out -} - -func (memall *MemstoreAllIterator) Next() (graph.TSVal, bool) { - next, out := memall.Int64AllIterator.Next() - if !out { - return next, out - } - i64 := next.(int64) - _, ok := memall.ts.revIdMap[i64] - if !ok { - return memall.Next() - } - memall.Last = next - return next, out -} diff --git a/src/graph_memstore/memtriplestore-iterator-optimize.go b/src/graph_memstore/memtriplestore-iterator-optimize.go deleted file mode 100644 index 2fd913b..0000000 --- a/src/graph_memstore/memtriplestore-iterator-optimize.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_memstore - -import ( - "graph" -) - -func (ts *MemTripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { - switch it.Type() { - case "linksto": - return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) - - } - return it, false -} - -func (ts *MemTripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { - l := it.GetSubIterators() - if l.Len() != 1 { - return it, false - } - primaryIt := l.Front().Value.(graph.Iterator) - if primaryIt.Type() == "fixed" { - size, _ := primaryIt.Size() - if size == 1 { - val, ok := primaryIt.Next() - if !ok { - panic("Sizes lie") - } - newIt := ts.GetTripleIterator(it.Direction(), val) - newIt.CopyTagsFrom(it) - for _, tag := range primaryIt.Tags() { - newIt.AddFixedTag(tag, val) - } - return newIt, true - } - } - it.Close() - return it, false -} diff --git a/src/graph_memstore/memtriplestore.go b/src/graph_memstore/memtriplestore.go deleted file mode 100644 index 83f5c17..0000000 --- a/src/graph_memstore/memtriplestore.go +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_memstore - -import ( - "fmt" - "github.com/barakmich/glog" - "github.com/petar/GoLLRB/llrb" - "graph" -) - -type TripleDirectionIndex struct { - subject map[int64]*llrb.LLRB - predicate map[int64]*llrb.LLRB - object map[int64]*llrb.LLRB - provenance map[int64]*llrb.LLRB -} - -func NewTripleDirectionIndex() *TripleDirectionIndex { - var tdi TripleDirectionIndex - tdi.subject = make(map[int64]*llrb.LLRB) - tdi.predicate = make(map[int64]*llrb.LLRB) - tdi.object = make(map[int64]*llrb.LLRB) - tdi.provenance = make(map[int64]*llrb.LLRB) - return &tdi -} - -func (tdi *TripleDirectionIndex) GetForDir(s string) map[int64]*llrb.LLRB { - if s == "s" { - return tdi.subject - } else if s == "o" { - return tdi.object - } else if s == "p" { - return tdi.predicate - } else if s == "c" { - return tdi.provenance - } - panic("Bad direction") -} - -func (tdi *TripleDirectionIndex) GetOrCreate(dir string, id int64) *llrb.LLRB { - directionIndex := tdi.GetForDir(dir) - if _, ok := directionIndex[id]; !ok { - directionIndex[id] = llrb.New() - } - return directionIndex[id] -} - -func (tdi *TripleDirectionIndex) Get(dir string, id int64) (*llrb.LLRB, bool) { - directionIndex := tdi.GetForDir(dir) - tree, exists := directionIndex[id] - return tree, exists -} - -type MemTripleStore struct { - idCounter int64 - tripleIdCounter int64 - idMap map[string]int64 - revIdMap map[int64]string - triples []graph.Triple - size int64 - index TripleDirectionIndex - // vip_index map[string]map[int64]map[string]map[int64]*llrb.Tree -} - -func NewMemTripleStore() *MemTripleStore { - var ts MemTripleStore - ts.idMap = make(map[string]int64) - ts.revIdMap = make(map[int64]string) - ts.triples = make([]graph.Triple, 1, 200) - - // Sentinel null triple so triple indices start at 1 - ts.triples[0] = graph.Triple{} - ts.size = 1 - ts.index = *NewTripleDirectionIndex() - ts.idCounter = 1 - ts.tripleIdCounter = 1 - return &ts -} - -func (ts *MemTripleStore) AddTripleSet(triples []*graph.Triple) { - for _, t := range triples { - ts.AddTriple(t) - } -} - -func (ts *MemTripleStore) tripleExists(t *graph.Triple) (bool, int64) { - smallest := -1 - var smallest_tree *llrb.LLRB - for _, dir := range graph.TripleDirections { - sid := t.Get(dir) - if dir == "c" && sid == "" { - continue - } - id, ok := ts.idMap[sid] - // If we've never heard about a node, it most not exist - if !ok { - return false, 0 - } - index, exists := ts.index.Get(dir, id) - if !exists { - // If it's never been indexed in this direction, it can't exist. - return false, 0 - } - if smallest == -1 || index.Len() < smallest { - smallest = index.Len() - smallest_tree = index - } - } - it := NewLlrbIterator(smallest_tree, "") - - for { - val, ok := it.Next() - if !ok { - break - } - if t.Equals(&ts.triples[val.(int64)]) { - return true, val.(int64) - } - } - return false, 0 -} - -func (ts *MemTripleStore) AddTriple(t *graph.Triple) { - if exists, _ := ts.tripleExists(t); exists { - return - } - var tripleID int64 - ts.triples = append(ts.triples, *t) - tripleID = ts.tripleIdCounter - ts.size++ - ts.tripleIdCounter++ - - for _, dir := range graph.TripleDirections { - sid := t.Get(dir) - if dir == "c" && sid == "" { - continue - } - if _, ok := ts.idMap[sid]; !ok { - ts.idMap[sid] = ts.idCounter - ts.revIdMap[ts.idCounter] = sid - ts.idCounter++ - } - } - - for _, dir := range graph.TripleDirections { - if dir == "c" && t.Get(dir) == "" { - continue - } - id := ts.idMap[t.Get(dir)] - tree := ts.index.GetOrCreate(dir, id) - tree.ReplaceOrInsert(Int64(tripleID)) - } - - // TODO(barakmich): Add VIP indexing -} - -func (ts *MemTripleStore) RemoveTriple(t *graph.Triple) { - var tripleID int64 - var exists bool - tripleID = 0 - if exists, tripleID = ts.tripleExists(t); !exists { - return - } - - ts.triples[tripleID] = graph.Triple{} - ts.size-- - - for _, dir := range graph.TripleDirections { - if dir == "c" && t.Get(dir) == "" { - continue - } - id := ts.idMap[t.Get(dir)] - tree := ts.index.GetOrCreate(dir, id) - tree.Delete(Int64(tripleID)) - } - - for _, dir := range graph.TripleDirections { - if dir == "c" && t.Get(dir) == "" { - continue - } - id, ok := ts.idMap[t.Get(dir)] - if !ok { - continue - } - stillExists := false - for _, dir := range graph.TripleDirections { - if dir == "c" && t.Get(dir) == "" { - continue - } - nodeTree := ts.index.GetOrCreate(dir, id) - if nodeTree.Len() != 0 { - stillExists = true - break - } - } - if !stillExists { - delete(ts.idMap, t.Get(dir)) - delete(ts.revIdMap, id) - } - } -} - -func (ts *MemTripleStore) GetTriple(index graph.TSVal) *graph.Triple { - return &ts.triples[index.(int64)] -} - -func (ts *MemTripleStore) GetTripleIterator(direction string, value graph.TSVal) graph.Iterator { - index, ok := ts.index.Get(direction, value.(int64)) - data := fmt.Sprintf("dir:%s val:%d", direction, value.(int64)) - if ok { - return NewLlrbIterator(index, data) - } - return &graph.NullIterator{} -} - -func (ts *MemTripleStore) Size() int64 { - return ts.size - 1 // Don't count the sentinel -} - -func (ts *MemTripleStore) DebugPrint() { - for i, t := range ts.triples { - if i == 0 { - continue - } - glog.V(2).Infoln("%d: %s", i, t.ToString()) - } -} - -func (ts *MemTripleStore) GetIdFor(name string) graph.TSVal { - return ts.idMap[name] -} - -func (ts *MemTripleStore) GetNameFor(id graph.TSVal) string { - return ts.revIdMap[id.(int64)] -} - -func (ts *MemTripleStore) GetTriplesAllIterator() graph.Iterator { - return graph.NewInt64AllIterator(0, ts.Size()) -} - -func (ts *MemTripleStore) MakeFixed() *graph.FixedIterator { - return graph.NewFixedIteratorWithCompare(graph.BasicEquality) -} - -func (ts *MemTripleStore) GetTripleDirection(val graph.TSVal, direction string) graph.TSVal { - name := ts.GetTriple(val).Get(direction) - return ts.GetIdFor(name) -} - -func (ts *MemTripleStore) GetNodesAllIterator() graph.Iterator { - return NewMemstoreAllIterator(ts) -} -func (ts *MemTripleStore) Close() {} diff --git a/src/graph_memstore/memtriplestore_test.go b/src/graph_memstore/memtriplestore_test.go deleted file mode 100644 index 0321028..0000000 --- a/src/graph_memstore/memtriplestore_test.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_memstore - -import ( - . "github.com/smartystreets/goconvey/convey" - "graph" - "sort" - "testing" -) - -func TestMemstore(t *testing.T) { - Convey("With a simple memstore", t, func() { - ts := MakeTestingMemstore() - Convey("It should have a reasonable size", func() { - So(ts.Size(), ShouldEqual, 11) - }) - Convey("It should have an Id Space that makes sense", func() { - v := ts.GetIdFor("C") - So(v.(int64), ShouldEqual, 4) - }) - }) -} - -func TestIteratorsAndNextResultOrderA(t *testing.T) { - ts := MakeTestingMemstore() - fixed := ts.MakeFixed() - fixed.AddValue(ts.GetIdFor("C")) - all := ts.GetNodesAllIterator() - lto := graph.NewLinksToIterator(ts, all, "o") - innerAnd := graph.NewAndIterator() - - fixed2 := ts.MakeFixed() - fixed2.AddValue(ts.GetIdFor("follows")) - lto2 := graph.NewLinksToIterator(ts, fixed2, "p") - innerAnd.AddSubIterator(lto2) - innerAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(ts, innerAnd, "s") - outerAnd := graph.NewAndIterator() - outerAnd.AddSubIterator(fixed) - outerAnd.AddSubIterator(hasa) - val, ok := outerAnd.Next() - if !ok { - t.Error("Expected one matching subtree") - } - if ts.GetNameFor(val) != "C" { - t.Errorf("Matching subtree should be %s, got %s", "barak", ts.GetNameFor(val)) - } - expected := make([]string, 2) - expected[0] = "B" - expected[1] = "D" - actualOut := make([]string, 2) - actualOut[0] = ts.GetNameFor(all.LastResult()) - nresultOk := outerAnd.NextResult() - if !nresultOk { - t.Error("Expected two results got one") - } - actualOut[1] = ts.GetNameFor(all.LastResult()) - nresultOk = outerAnd.NextResult() - if nresultOk { - t.Error("Expected two results got three") - } - CompareStringSlices(t, expected, actualOut) - val, ok = outerAnd.Next() - if ok { - t.Error("More than one possible top level output?") - } -} - -func CompareStringSlices(t *testing.T, expected []string, actual []string) { - if len(expected) != len(actual) { - t.Error("String slices are not the same length") - } - sort.Strings(expected) - sort.Strings(actual) - for i := 0; i < len(expected); i++ { - if expected[i] != actual[i] { - t.Errorf("At index %d, expected \"%s\" and got \"%s\"", i, expected[i], actual[i]) - } - } -} - -func TestLinksToOptimization(t *testing.T) { - ts := MakeTestingMemstore() - fixed := ts.MakeFixed() - fixed.AddValue(ts.GetIdFor("cool")) - lto := graph.NewLinksToIterator(ts, fixed, "o") - lto.AddTag("foo") - newIt, changed := lto.Optimize() - if !changed { - t.Error("Iterator didn't change") - } - if newIt.Type() != "llrb" { - t.Fatal("Didn't swap out to LLRB") - } - v := newIt.(*LlrbIterator) - v_clone := v.Clone() - if v_clone.DebugString(0) != v.DebugString(0) { - t.Fatal("Wrong iterator. Got ", v_clone.DebugString(0)) - } - if len(v_clone.Tags()) < 1 || v_clone.Tags()[0] != "foo" { - t.Fatal("Tag on LinksTo did not persist") - } -} - -func TestRemoveTriple(t *testing.T) { - ts := MakeTestingMemstore() - ts.RemoveTriple(graph.MakeTriple("E", "follows", "F", "")) - fixed := ts.MakeFixed() - fixed.AddValue(ts.GetIdFor("E")) - lto := graph.NewLinksToIterator(ts, fixed, "s") - fixed2 := ts.MakeFixed() - fixed2.AddValue(ts.GetIdFor("follows")) - lto2 := graph.NewLinksToIterator(ts, fixed2, "p") - innerAnd := graph.NewAndIterator() - innerAnd.AddSubIterator(lto2) - innerAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(ts, innerAnd, "o") - newIt, _ := hasa.Optimize() - _, ok := newIt.Next() - if ok { - t.Error("E should not have any followers.") - } -} diff --git a/src/graph_memstore/testing_memstore.go b/src/graph_memstore/testing_memstore.go deleted file mode 100644 index d32d838..0000000 --- a/src/graph_memstore/testing_memstore.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_memstore - -import "graph" - -// +---+ +---+ -// | A |------- ->| F |<-- -// +---+ \------>+---+-/ +---+ \--+---+ -// ------>|#B#| | | E | -// +---+-------/ >+---+ | +---+ -// | C | / v -// +---+ -/ +---+ -// ---- +---+/ |#G#| -// \-->|#D#|------------->+---+ -// +---+ -// - -func MakeTestingMemstore() *MemTripleStore { - ts := NewMemTripleStore() - ts.AddTriple(graph.MakeTriple("A", "follows", "B", "")) - ts.AddTriple(graph.MakeTriple("C", "follows", "B", "")) - ts.AddTriple(graph.MakeTriple("C", "follows", "D", "")) - ts.AddTriple(graph.MakeTriple("D", "follows", "B", "")) - ts.AddTriple(graph.MakeTriple("B", "follows", "F", "")) - ts.AddTriple(graph.MakeTriple("F", "follows", "G", "")) - ts.AddTriple(graph.MakeTriple("D", "follows", "G", "")) - ts.AddTriple(graph.MakeTriple("E", "follows", "F", "")) - ts.AddTriple(graph.MakeTriple("B", "status", "cool", "status_graph")) - ts.AddTriple(graph.MakeTriple("D", "status", "cool", "status_graph")) - ts.AddTriple(graph.MakeTriple("G", "status", "cool", "status_graph")) - return ts -} diff --git a/src/graph_mongo/lru.go b/src/graph_mongo/lru.go deleted file mode 100644 index c856a5a..0000000 --- a/src/graph_mongo/lru.go +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_mongo - -import ( - "container/list" -) - -type IDLru struct { - cache map[string]*list.Element - priority *list.List - maxSize int -} - -type KV struct { - key string - value string -} - -func NewIDLru(size int) *IDLru { - var lru IDLru - lru.maxSize = size - lru.priority = list.New() - lru.cache = make(map[string]*list.Element) - return &lru -} - -func (lru *IDLru) Put(key string, value string) { - if _, ok := lru.Get(key); ok { - return - } - if len(lru.cache) == lru.maxSize { - lru.removeOldest() - } - lru.priority.PushFront(KV{key: key, value: value}) - lru.cache[key] = lru.priority.Front() -} - -func (lru *IDLru) Get(key string) (string, bool) { - if element, ok := lru.cache[key]; ok { - lru.priority.MoveToFront(element) - return element.Value.(KV).value, true - } - return "", false -} - -func (lru *IDLru) removeOldest() { - last := lru.priority.Remove(lru.priority.Back()) - delete(lru.cache, last.(KV).key) -} diff --git a/src/graph_mongo/mongo-iterator.go b/src/graph_mongo/mongo-iterator.go deleted file mode 100644 index 6d7f84a..0000000 --- a/src/graph_mongo/mongo-iterator.go +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_mongo - -import ( - "fmt" - "github.com/barakmich/glog" - "graph" - "labix.org/v2/mgo" - "labix.org/v2/mgo/bson" - "strings" -) - -type MongoIterator struct { - graph.BaseIterator - ts *MongoTripleStore - dir string - iter *mgo.Iter - hash string - name string - size int64 - isAll bool - constraint bson.M - collection string -} - -func NewMongoIterator(ts *MongoTripleStore, collection string, dir string, val graph.TSVal) *MongoIterator { - var m MongoIterator - graph.BaseIteratorInit(&m.BaseIterator) - - m.name = ts.GetNameFor(val) - m.collection = collection - switch dir { - - case "s": - m.constraint = bson.M{"Sub": m.name} - case "p": - m.constraint = bson.M{"Pred": m.name} - case "o": - m.constraint = bson.M{"Obj": m.name} - case "c": - m.constraint = bson.M{"Provenance": m.name} - } - - m.ts = ts - m.dir = dir - m.iter = ts.db.C(collection).Find(m.constraint).Iter() - size, err := ts.db.C(collection).Find(m.constraint).Count() - if err != nil { - glog.Errorln("Trouble getting size for iterator! ", err) - return nil - } - m.size = int64(size) - m.hash = val.(string) - m.isAll = false - return &m -} - -func NewMongoAllIterator(ts *MongoTripleStore, collection string) *MongoIterator { - var m MongoIterator - m.ts = ts - m.dir = "all" - m.constraint = nil - m.collection = collection - m.iter = ts.db.C(collection).Find(nil).Iter() - size, err := ts.db.C(collection).Count() - if err != nil { - glog.Errorln("Trouble getting size for iterator! ", err) - return nil - } - m.size = int64(size) - m.hash = "" - m.isAll = true - return &m -} - -func (m *MongoIterator) Reset() { - m.iter.Close() - m.iter = m.ts.db.C(m.collection).Find(m.constraint).Iter() - -} - -func (m *MongoIterator) Close() { - m.iter.Close() -} - -func (m *MongoIterator) Clone() graph.Iterator { - var newM graph.Iterator - if m.isAll { - newM = NewMongoAllIterator(m.ts, m.collection) - } else { - newM = NewMongoIterator(m.ts, m.collection, m.dir, m.hash) - } - newM.CopyTagsFrom(m) - return newM -} - -func (m *MongoIterator) Next() (graph.TSVal, bool) { - var result struct { - Id string "_id" - //Sub string "Sub" - //Pred string "Pred" - //Obj string "Obj" - } - found := m.iter.Next(&result) - if !found { - err := m.iter.Err() - if err != nil { - glog.Errorln("Error Nexting MongoIterator: ", err) - } - return nil, false - } - m.Last = result.Id - return result.Id, true -} - -func (m *MongoIterator) Check(v graph.TSVal) bool { - graph.CheckLogIn(m, v) - if m.isAll { - m.Last = v - return graph.CheckLogOut(m, v, true) - } - var offset int - switch m.dir { - case "s": - offset = 0 - case "p": - offset = (m.ts.hasher.Size() * 2) - case "o": - offset = (m.ts.hasher.Size() * 2) * 2 - case "c": - offset = (m.ts.hasher.Size() * 2) * 3 - } - val := v.(string)[offset : m.ts.hasher.Size()*2+offset] - if val == m.hash { - m.Last = v - return graph.CheckLogOut(m, v, true) - } - return graph.CheckLogOut(m, v, false) -} - -func (m *MongoIterator) Size() (int64, bool) { - return m.size, true -} - -func (m *MongoIterator) Type() string { - if m.isAll { - return "all" - } - return "mongo" -} -func (m *MongoIterator) Sorted() bool { return true } -func (m *MongoIterator) Optimize() (graph.Iterator, bool) { return m, false } - -func (m *MongoIterator) DebugString(indent int) string { - size, _ := m.Size() - return fmt.Sprintf("%s(%s size:%d %s %s)", strings.Repeat(" ", indent), m.Type(), size, m.hash, m.name) -} - -func (m *MongoIterator) GetStats() *graph.IteratorStats { - size, _ := m.Size() - return &graph.IteratorStats{ - CheckCost: 1, - NextCost: 5, - Size: size, - } -} diff --git a/src/graph_mongo/mongo-triplestore-iterator-optimize.go b/src/graph_mongo/mongo-triplestore-iterator-optimize.go deleted file mode 100644 index ea35602..0000000 --- a/src/graph_mongo/mongo-triplestore-iterator-optimize.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_mongo - -import ( - "graph" -) - -func (ts *MongoTripleStore) OptimizeIterator(it graph.Iterator) (graph.Iterator, bool) { - switch it.Type() { - case "linksto": - return ts.optimizeLinksTo(it.(*graph.LinksToIterator)) - - } - return it, false -} - -func (ts *MongoTripleStore) optimizeLinksTo(it *graph.LinksToIterator) (graph.Iterator, bool) { - l := it.GetSubIterators() - if l.Len() != 1 { - return it, false - } - primaryIt := l.Front().Value.(graph.Iterator) - if primaryIt.Type() == "fixed" { - size, _ := primaryIt.Size() - if size == 1 { - val, ok := primaryIt.Next() - if !ok { - panic("Sizes lie") - } - newIt := ts.GetTripleIterator(it.Direction(), val) - newIt.CopyTagsFrom(it) - for _, tag := range primaryIt.Tags() { - newIt.AddFixedTag(tag, val) - } - it.Close() - return newIt, true - } - } - return it, false -} diff --git a/src/graph_mongo/mongo-triplestore.go b/src/graph_mongo/mongo-triplestore.go deleted file mode 100644 index fa4ea25..0000000 --- a/src/graph_mongo/mongo-triplestore.go +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_mongo - -import ( - "crypto/sha1" - "encoding/hex" - "github.com/barakmich/glog" - "graph" - "hash" - "labix.org/v2/mgo" - "labix.org/v2/mgo/bson" - "log" -) - -const DefaultDBName = "cayley" - -type MongoTripleStore struct { - session *mgo.Session - db *mgo.Database - hasher hash.Hash - idCache *IDLru -} - -func CreateNewMongoGraph(addr string, options graph.OptionsDict) bool { - conn, err := mgo.Dial(addr) - if err != nil { - glog.Fatal("Error connecting: ", err) - return false - } - conn.SetSafe(&mgo.Safe{}) - dbName := DefaultDBName - if val, ok := options.GetStringKey("database_name"); ok { - dbName = val - } - db := conn.DB(dbName) - indexOpts := mgo.Index{ - Key: []string{"Sub"}, - Unique: false, - DropDups: false, - Background: true, - Sparse: true, - } - db.C("triples").EnsureIndex(indexOpts) - indexOpts.Key = []string{"Pred"} - db.C("triples").EnsureIndex(indexOpts) - indexOpts.Key = []string{"Obj"} - db.C("triples").EnsureIndex(indexOpts) - indexOpts.Key = []string{"Provenance"} - db.C("triples").EnsureIndex(indexOpts) - return true -} - -func NewMongoTripleStore(addr string, options graph.OptionsDict) *MongoTripleStore { - var ts MongoTripleStore - conn, err := mgo.Dial(addr) - if err != nil { - glog.Fatal("Error connecting: ", err) - } - conn.SetSafe(&mgo.Safe{}) - dbName := DefaultDBName - if val, ok := options.GetStringKey("database_name"); ok { - dbName = val - } - ts.db = conn.DB(dbName) - ts.session = conn - ts.hasher = sha1.New() - ts.idCache = NewIDLru(1 << 16) - return &ts -} - -func (ts *MongoTripleStore) getIdForTriple(t *graph.Triple) string { - id := ts.ConvertStringToByteHash(t.Sub) - id += ts.ConvertStringToByteHash(t.Pred) - id += ts.ConvertStringToByteHash(t.Obj) - id += ts.ConvertStringToByteHash(t.Provenance) - return id -} - -func (ts *MongoTripleStore) ConvertStringToByteHash(s string) string { - ts.hasher.Reset() - key := make([]byte, 0, ts.hasher.Size()) - ts.hasher.Write([]byte(s)) - key = ts.hasher.Sum(key) - return hex.EncodeToString(key) -} - -type MongoNode struct { - Id string "_id" - Name string "Name" - Size int "Size" -} - -func (ts *MongoTripleStore) updateNodeBy(node_name string, inc int) { - var size MongoNode - node := ts.GetIdFor(node_name) - err := ts.db.C("nodes").FindId(node).One(&size) - if err != nil { - if err.Error() == "not found" { - // Not found. Okay. - size.Id = node.(string) - size.Name = node_name - size.Size = inc - } else { - glog.Error("Error:", err) - return - } - } else { - size.Id = node.(string) - size.Name = node_name - size.Size += inc - } - - // Removing something... - if inc < 0 { - if size.Size <= 0 { - err := ts.db.C("nodes").RemoveId(node) - if err != nil { - glog.Error("Error: ", err, " while removing node ", node_name) - return - } - } - } - - _, err2 := ts.db.C("nodes").UpsertId(node, size) - if err2 != nil { - glog.Error("Error: ", err) - } -} - -func (ts *MongoTripleStore) writeTriple(t *graph.Triple) bool { - tripledoc := bson.M{"_id": ts.getIdForTriple(t), "Sub": t.Sub, "Pred": t.Pred, "Obj": t.Obj, "Provenance": t.Provenance} - err := ts.db.C("triples").Insert(tripledoc) - if err != nil { - // Among the reasons I hate MongoDB. "Errors don't happen! Right guys?" - if err.(*mgo.LastError).Code == 11000 { - return false - } - glog.Error("Error: ", err) - return false - } - return true -} - -func (ts *MongoTripleStore) AddTriple(t *graph.Triple) { - _ = ts.writeTriple(t) - ts.updateNodeBy(t.Sub, 1) - ts.updateNodeBy(t.Pred, 1) - ts.updateNodeBy(t.Obj, 1) - if t.Provenance != "" { - ts.updateNodeBy(t.Provenance, 1) - } -} - -func (ts *MongoTripleStore) AddTripleSet(in []*graph.Triple) { - ts.session.SetSafe(nil) - idMap := make(map[string]int) - for _, t := range in { - wrote := ts.writeTriple(t) - if wrote { - idMap[t.Sub]++ - idMap[t.Obj]++ - idMap[t.Pred]++ - if t.Provenance != "" { - idMap[t.Provenance]++ - } - } - } - for k, v := range idMap { - ts.updateNodeBy(k, v) - } - ts.session.SetSafe(&mgo.Safe{}) -} - -func (ts *MongoTripleStore) RemoveTriple(t *graph.Triple) { - err := ts.db.C("triples").RemoveId(ts.getIdForTriple(t)) - if err == mgo.ErrNotFound { - return - } else if err != nil { - log.Println("Error: ", err, " while removing triple ", t) - return - } - ts.updateNodeBy(t.Sub, -1) - ts.updateNodeBy(t.Pred, -1) - ts.updateNodeBy(t.Obj, -1) - if t.Provenance != "" { - ts.updateNodeBy(t.Provenance, -1) - } -} - -func (ts *MongoTripleStore) GetTriple(val graph.TSVal) *graph.Triple { - var bsonDoc bson.M - err := ts.db.C("triples").FindId(val.(string)).One(&bsonDoc) - if err != nil { - log.Println("Error: Couldn't retrieve triple", val.(string), err) - } - return graph.MakeTriple( - bsonDoc["Sub"].(string), - bsonDoc["Pred"].(string), - bsonDoc["Obj"].(string), - bsonDoc["Provenance"].(string)) -} - -func (ts *MongoTripleStore) GetTripleIterator(dir string, val graph.TSVal) graph.Iterator { - return NewMongoIterator(ts, "triples", dir, val) -} - -func (ts *MongoTripleStore) GetNodesAllIterator() graph.Iterator { - return NewMongoAllIterator(ts, "nodes") -} - -func (ts *MongoTripleStore) GetTriplesAllIterator() graph.Iterator { - return NewMongoAllIterator(ts, "triples") -} - -func (ts *MongoTripleStore) GetIdFor(s string) graph.TSVal { - return ts.ConvertStringToByteHash(s) -} - -func (ts *MongoTripleStore) GetNameFor(v graph.TSVal) string { - val, ok := ts.idCache.Get(v.(string)) - if ok { - return val - } - var node MongoNode - err := ts.db.C("nodes").FindId(v.(string)).One(&node) - if err != nil { - log.Println("Error: Couldn't retrieve node", v.(string), err) - } - ts.idCache.Put(v.(string), node.Name) - return node.Name -} - -func (ts *MongoTripleStore) Size() int64 { - count, err := ts.db.C("triples").Count() - if err != nil { - glog.Error("Error: ", err) - return 0 - } - return int64(count) -} - -func compareStrings(a, b graph.TSVal) bool { - return a.(string) == b.(string) -} - -func (ts *MongoTripleStore) MakeFixed() *graph.FixedIterator { - return graph.NewFixedIteratorWithCompare(compareStrings) -} - -func (ts *MongoTripleStore) Close() { - ts.db.Session.Close() -} - -func (ts *MongoTripleStore) GetTripleDirection(in graph.TSVal, dir string) graph.TSVal { - // Maybe do the trick here - var offset int - switch dir { - case "s": - offset = 0 - case "p": - offset = (ts.hasher.Size() * 2) - case "o": - offset = (ts.hasher.Size() * 2) * 2 - case "c": - offset = (ts.hasher.Size() * 2) * 3 - } - val := in.(string)[offset : ts.hasher.Size()*2+offset] - return val -} - -func (ts *MongoTripleStore) BulkLoad(t_chan chan *graph.Triple) { - ts.session.SetSafe(nil) - for triple := range t_chan { - ts.writeTriple(triple) - } - outputTo := bson.M{"replace": "nodes", "sharded": true} - glog.Infoln("Mapreducing") - job := mgo.MapReduce{ - Map: `function() { - var len = this["_id"].length - var s_key = this["_id"].slice(0, len / 4) - var p_key = this["_id"].slice(len / 4, 2 * len / 4) - var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4) - var c_key = this["_id"].slice(3 * len / 4) - emit(s_key, {"_id": s_key, "Name" : this.Sub, "Size" : 1}) - emit(p_key, {"_id": p_key, "Name" : this.Pred, "Size" : 1}) - emit(o_key, {"_id": o_key, "Name" : this.Obj, "Size" : 1}) - if (this.Provenance != "") { - emit(c_key, {"_id": c_key, "Name" : this.Provenance, "Size" : 1}) - } - } - `, - Reduce: ` - function(key, value_list) { - out = {"_id": key, "Name": value_list[0].Name} - count = 0 - for (var i = 0; i < value_list.length; i++) { - count = count + value_list[i].Size - - } - out["Size"] = count - return out - } - `, - Out: outputTo, - } - ts.db.C("triples").Find(nil).MapReduce(&job, nil) - glog.Infoln("Fixing") - ts.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) { - db.nodes.update({"_id": result._id}, result.value) - }) }`}, {"args", bson.D{}}}, nil) - - ts.session.SetSafe(&mgo.Safe{}) -} diff --git a/src/graph_sexp/parser.go b/src/graph_sexp/parser.go deleted file mode 100644 index 529b854..0000000 --- a/src/graph_sexp/parser.go +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_sexp - -import ( - "github.com/badgerodon/peg" - "graph" -) - -func BuildIteratorTreeForQuery(ts graph.TripleStore, query string) graph.Iterator { - tree := parseQuery(query) - return buildIteratorTree(tree, ts) -} - -func ParseString(input string) string { - return parseQuery(input).String() -} - -func parseQuery(input string) *peg.ExpressionTree { - parser := peg.NewParser() - - start := parser.NonTerminal("Start") - whitespace := parser.NonTerminal("Whitespace") - quotedString := parser.NonTerminal("QuotedString") - rootConstraint := parser.NonTerminal("RootConstraint") - - constraint := parser.NonTerminal("Constraint") - colonIdentifier := parser.NonTerminal("ColonIdentifier") - variable := parser.NonTerminal("Variable") - identifier := parser.NonTerminal("Identifier") - fixedNode := parser.NonTerminal("FixedNode") - nodeIdent := parser.NonTerminal("NodeIdentifier") - predIdent := parser.NonTerminal("PredIdentifier") - reverse := parser.NonTerminal("Reverse") - predKeyword := parser.NonTerminal("PredicateKeyword") - optional := parser.NonTerminal("OptionalKeyword") - - start.Expression = rootConstraint - - whitespace.Expression = parser.OneOrMore( - parser.OrderedChoice( - parser.Terminal(' '), - parser.Terminal('\t'), - parser.Terminal('\n'), - parser.Terminal('\r'), - ), - ) - - quotedString.Expression = parser.Sequence( - parser.Terminal('"'), - parser.OneOrMore( - parser.OrderedChoice( - parser.Range('0', '9'), - parser.Range('a', 'z'), - parser.Range('A', 'Z'), - parser.Terminal('_'), - parser.Terminal('/'), - parser.Terminal(':'), - parser.Terminal(' '), - parser.Terminal('\''), - ), - ), - parser.Terminal('"'), - ) - - predKeyword.Expression = parser.OrderedChoice( - optional, - ) - - optional.Expression = parser.Sequence( - parser.Terminal('o'), - parser.Terminal('p'), - parser.Terminal('t'), - parser.Terminal('i'), - parser.Terminal('o'), - parser.Terminal('n'), - parser.Terminal('a'), - parser.Terminal('l'), - ) - - identifier.Expression = parser.OneOrMore( - parser.OrderedChoice( - parser.Range('0', '9'), - parser.Range('a', 'z'), - parser.Range('A', 'Z'), - parser.Terminal('_'), - parser.Terminal('.'), - parser.Terminal('/'), - parser.Terminal(':'), - parser.Terminal('#'), - ), - ) - - reverse.Expression = parser.Terminal('!') - - variable.Expression = parser.Sequence( - parser.Terminal('$'), - identifier, - ) - - colonIdentifier.Expression = parser.Sequence( - parser.Terminal(':'), - identifier, - ) - - fixedNode.Expression = parser.OrderedChoice( - colonIdentifier, - quotedString, - ) - - nodeIdent.Expression = parser.OrderedChoice( - variable, - fixedNode, - ) - - predIdent.Expression = parser.Sequence( - parser.Optional(reverse), - parser.OrderedChoice( - nodeIdent, - constraint, - ), - ) - - constraint.Expression = parser.Sequence( - parser.Terminal('('), - parser.Optional(whitespace), - predIdent, - parser.Optional(whitespace), - parser.Optional(predKeyword), - parser.Optional(whitespace), - parser.OrderedChoice( - nodeIdent, - rootConstraint, - ), - parser.Optional(whitespace), - parser.Terminal(')'), - ) - - rootConstraint.Expression = parser.Sequence( - parser.Terminal('('), - parser.Optional(whitespace), - nodeIdent, - parser.Optional(whitespace), - parser.ZeroOrMore(parser.Sequence( - constraint, - parser.Optional(whitespace), - )), - parser.Terminal(')'), - ) - - tree := parser.Parse(input) - return tree -} - -func getIdentString(tree *peg.ExpressionTree) string { - out := "" - if len(tree.Children) > 0 { - for _, child := range tree.Children { - out += getIdentString(child) - } - } else { - if tree.Value != '"' { - out += string(tree.Value) - } - } - return out -} - -func buildIteratorTree(tree *peg.ExpressionTree, ts graph.TripleStore) graph.Iterator { - switch tree.Name { - case "Start": - return buildIteratorTree(tree.Children[0], ts) - case "NodeIdentifier": - var out graph.Iterator - nodeID := getIdentString(tree) - if tree.Children[0].Name == "Variable" { - allIt := ts.GetNodesAllIterator() - allIt.AddTag(nodeID) - out = allIt - } else { - n := nodeID - if tree.Children[0].Children[0].Name == "ColonIdentifier" { - n = nodeID[1:] - } - fixed := ts.MakeFixed() - fixed.AddValue(ts.GetIdFor(n)) - out = fixed - } - return out - case "PredIdentifier": - i := 0 - if tree.Children[0].Name == "Reverse" { - //Taken care of below - i++ - } - it := buildIteratorTree(tree.Children[i], ts) - lto := graph.NewLinksToIterator(ts, it, "p") - return lto - case "RootConstraint": - constraintCount := 0 - and := graph.NewAndIterator() - for _, c := range tree.Children { - switch c.Name { - case "NodeIdentifier": - fallthrough - case "Constraint": - it := buildIteratorTree(c, ts) - and.AddSubIterator(it) - constraintCount++ - continue - default: - continue - } - } - return and - case "Constraint": - var hasa *graph.HasaIterator - topLevelDir := "s" - subItDir := "o" - subAnd := graph.NewAndIterator() - isOptional := false - for _, c := range tree.Children { - switch c.Name { - case "PredIdentifier": - if c.Children[0].Name == "Reverse" { - topLevelDir = "o" - subItDir = "s" - } - it := buildIteratorTree(c, ts) - subAnd.AddSubIterator(it) - continue - case "PredicateKeyword": - switch c.Children[0].Name { - case "OptionalKeyword": - isOptional = true - } - case "NodeIdentifier": - fallthrough - case "RootConstraint": - it := buildIteratorTree(c, ts) - l := graph.NewLinksToIterator(ts, it, subItDir) - subAnd.AddSubIterator(l) - continue - default: - continue - } - } - hasa = graph.NewHasaIterator(ts, subAnd, topLevelDir) - if isOptional { - optional := graph.NewOptionalIterator(hasa) - return optional - } - return hasa - default: - return &graph.NullIterator{} - } - panic("Not reached") -} diff --git a/src/graph_sexp/parser_test.go b/src/graph_sexp/parser_test.go deleted file mode 100644 index 90233c5..0000000 --- a/src/graph_sexp/parser_test.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_sexp - -import ( - . "github.com/smartystreets/goconvey/convey" - "graph" - "graph_memstore" - "testing" -) - -func TestBadParse(t *testing.T) { - str := ParseString("()") - if str != "" { - t.Errorf("It parsed! Got \"%s\"", str) - } -} - -func TestParseSexpWithMemstore(t *testing.T) { - Convey("With a Memstore", t, func() { - ts := graph_memstore.NewMemTripleStore() - - Convey("It should parse an empty query", func() { - it := BuildIteratorTreeForQuery(ts, "()") - So(it.Type(), ShouldEqual, "null") - }) - - Convey("It should get a single triple linkage", func() { - ts.AddTriple(graph.MakeTriple("i", "can", "win", "")) - query := "($a (:can \"win\"))" - So(len(query), ShouldEqual, 17) - it := BuildIteratorTreeForQuery(ts, query) - So(it.Type(), ShouldEqual, "and") - out, ok := it.Next() - So(ok, ShouldBeTrue) - So(out, ShouldEqual, ts.GetIdFor("i")) - }) - - Convey("It can get an internal linkage", func() { - ts.AddTriple(graph.MakeTriple("i", "can", "win", "")) - query := "(\"i\" (:can $a))" - it := BuildIteratorTreeForQuery(ts, query) - So(it.Type(), ShouldEqual, "and") - out, ok := it.Next() - So(ok, ShouldBeTrue) - So(out, ShouldEqual, ts.GetIdFor("i")) - }) - - }) -} - -func TestTreeConstraintParse(t *testing.T) { - ts := graph_memstore.NewMemTripleStore() - ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) - ts.AddTriple(graph.MakeTriple("food", "is", "good", "")) - query := "(\"i\"\n" + - "(:like\n" + - "($a (:is :good))))" - it := BuildIteratorTreeForQuery(ts, query) - if it.Type() != "and" { - t.Error("Odd iterator tree. Got: %s", it.DebugString(0)) - } - out, ok := it.Next() - if !ok { - t.Error("Got no results") - } - if out != ts.GetIdFor("i") { - t.Errorf("Got %d, expected %d", out, ts.GetIdFor("i")) - } -} - -func TestTreeConstraintTagParse(t *testing.T) { - ts := graph_memstore.NewMemTripleStore() - ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) - ts.AddTriple(graph.MakeTriple("food", "is", "good", "")) - query := "(\"i\"\n" + - "(:like\n" + - "($a (:is :good))))" - it := BuildIteratorTreeForQuery(ts, query) - _, ok := it.Next() - if !ok { - t.Error("Got no results") - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - if ts.GetNameFor(tags["$a"]) != "food" { - t.Errorf("Got %s, expected food", ts.GetNameFor(tags["$a"])) - } - -} - -func TestMultipleConstraintParse(t *testing.T) { - ts := graph_memstore.NewMemTripleStore() - ts.AddTriple(graph.MakeTriple("i", "like", "food", "")) - ts.AddTriple(graph.MakeTriple("i", "like", "beer", "")) - ts.AddTriple(graph.MakeTriple("you", "like", "beer", "")) - query := "($a \n" + - "(:like :beer)\n" + - "(:like \"food\"))" - it := BuildIteratorTreeForQuery(ts, query) - if it.Type() != "and" { - t.Error("Odd iterator tree. Got: %s", it.DebugString(0)) - } - out, ok := it.Next() - if !ok { - t.Error("Got no results") - } - if out != ts.GetIdFor("i") { - t.Errorf("Got %d, expected %d", out, ts.GetIdFor("i")) - } - _, ok = it.Next() - if ok { - t.Error("Too many results") - } -} diff --git a/src/graph_sexp/sexp-session.go b/src/graph_sexp/sexp-session.go deleted file mode 100644 index 161e5ae..0000000 --- a/src/graph_sexp/sexp-session.go +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package graph_sexp - -// Defines a running session of the sexp query language. - -import ( - "errors" - "fmt" - "graph" - "sort" -) - -type SexpSession struct { - ts graph.TripleStore - debug bool -} - -func NewSexpSession(inputTripleStore graph.TripleStore) *SexpSession { - var s SexpSession - s.ts = inputTripleStore - return &s -} - -func (s *SexpSession) ToggleDebug() { - s.debug = !s.debug -} - -func (s *SexpSession) InputParses(input string) (graph.ParseResult, error) { - var parenDepth int - for i, x := range input { - if x == '(' { - parenDepth++ - } - if x == ')' { - parenDepth-- - if parenDepth < 0 { - min := 0 - if (i - 10) > min { - min = i - 10 - } - return graph.ParseFail, errors.New(fmt.Sprintf("Too many close parens at char %d: %s", i, input[min:i])) - } - } - } - if parenDepth > 0 { - return graph.ParseMore, nil - } - if len(ParseString(input)) > 0 { - return graph.Parsed, nil - } - return graph.ParseFail, errors.New("Invalid Syntax") -} - -func (s *SexpSession) ExecInput(input string, out chan interface{}, limit int) { - it := BuildIteratorTreeForQuery(s.ts, input) - newIt, changed := it.Optimize() - if changed { - it = newIt - } - - if s.debug { - fmt.Println(it.DebugString(0)) - } - nResults := 0 - for { - _, ok := it.Next() - if !ok { - break - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - out <- &tags - nResults++ - if nResults > limit && limit != -1 { - break - } - for it.NextResult() == true { - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - out <- &tags - nResults++ - if nResults > limit && limit != -1 { - break - } - } - } - close(out) -} - -func (s *SexpSession) ToText(result interface{}) string { - out := fmt.Sprintln("****") - tags := result.(*map[string]graph.TSVal) - tagKeys := make([]string, len(*tags)) - i := 0 - for k, _ := range *tags { - tagKeys[i] = k - i++ - } - sort.Strings(tagKeys) - for _, k := range tagKeys { - if k == "$_" { - continue - } - out += fmt.Sprintf("%s : %s\n", k, s.ts.GetNameFor((*tags)[k])) - } - return out -} diff --git a/src/gremlin/gremlin-build-iterator.go b/src/gremlin/gremlin-build-iterator.go deleted file mode 100644 index 823c568..0000000 --- a/src/gremlin/gremlin-build-iterator.go +++ /dev/null @@ -1,313 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gremlin - -import ( - "github.com/barakmich/glog" - "github.com/robertkrimen/otto" - "graph" - "strconv" -) - -func getStrings(obj *otto.Object, field string) []string { - strings := make([]string, 0) - val, _ := obj.Get(field) - if !val.IsUndefined() { - export, _ := val.Export() - array := export.([]interface{}) - for _, arg := range array { - strings = append(strings, arg.(string)) - } - } - return strings -} - -func getStringArgs(obj *otto.Object) []string { return getStrings(obj, "string_args") } - -func buildIteratorTree(obj *otto.Object, ts graph.TripleStore) graph.Iterator { - if !isVertexChain(obj) { - return graph.NewNullIterator() - } - return buildIteratorTreeHelper(obj, ts, graph.NewNullIterator()) -} - -func makeListOfStringsFromArrayValue(obj *otto.Object) []string { - var output []string - lengthValue, _ := obj.Get("length") - length, _ := lengthValue.ToInteger() - ulength := uint32(length) - for index := uint32(0); index < ulength; index += 1 { - name := strconv.FormatInt(int64(index), 10) - value, err := obj.Get(name) - if err != nil { - continue - } - if !value.IsString() { - continue - } - s, _ := value.ToString() - output = append(output, s) - } - return output -} - -func buildIteratorFromValue(val otto.Value, ts graph.TripleStore) graph.Iterator { - if val.IsNull() || val.IsUndefined() { - return ts.GetNodesAllIterator() - } - if val.IsPrimitive() { - thing, _ := val.Export() - switch v := thing.(type) { - case string: - it := ts.MakeFixed() - it.AddValue(ts.GetIdFor(v)) - return it - default: - glog.Errorln("Trying to build unknown primitive value.") - } - } - switch val.Class() { - case "Object": - return buildIteratorTree(val.Object(), ts) - case "Array": - // Had better be an array of strings - strings := makeListOfStringsFromArrayValue(val.Object()) - it := ts.MakeFixed() - for _, x := range strings { - it.AddValue(ts.GetIdFor(x)) - } - return it - case "Number": - fallthrough - case "Boolean": - fallthrough - case "Date": - fallthrough - case "String": - it := ts.MakeFixed() - str, _ := val.ToString() - it.AddValue(ts.GetIdFor(str)) - return it - default: - glog.Errorln("Trying to handle unsupported Javascript value.") - return graph.NewNullIterator() - } -} - -func buildInOutIterator(obj *otto.Object, ts graph.TripleStore, base graph.Iterator, isReverse bool) graph.Iterator { - argList, _ := obj.Get("_gremlin_values") - if argList.Class() != "GoArray" { - glog.Errorln("How is arglist not an array? Return nothing.", argList.Class()) - return graph.NewNullIterator() - } - argArray := argList.Object() - lengthVal, _ := argArray.Get("length") - length, _ := lengthVal.ToInteger() - var predicateNodeIterator graph.Iterator - if length == 0 { - predicateNodeIterator = ts.GetNodesAllIterator() - } else { - zero, _ := argArray.Get("0") - predicateNodeIterator = buildIteratorFromValue(zero, ts) - } - if length >= 2 { - var tags []string - one, _ := argArray.Get("1") - if one.IsString() { - s, _ := one.ToString() - tags = append(tags, s) - } else if one.Class() == "Array" { - tags = makeListOfStringsFromArrayValue(one.Object()) - } - for _, tag := range tags { - predicateNodeIterator.AddTag(tag) - } - } - - in, out := "s", "o" - if isReverse { - in, out = out, in - } - lto := graph.NewLinksToIterator(ts, base, in) - and := graph.NewAndIterator() - and.AddSubIterator(graph.NewLinksToIterator(ts, predicateNodeIterator, "p")) - and.AddSubIterator(lto) - return graph.NewHasaIterator(ts, and, out) -} - -func buildIteratorTreeHelper(obj *otto.Object, ts graph.TripleStore, base graph.Iterator) graph.Iterator { - var it graph.Iterator - it = base - // TODO: Better error handling - kindVal, _ := obj.Get("_gremlin_type") - stringArgs := getStringArgs(obj) - var subIt graph.Iterator - prevVal, _ := obj.Get("_gremlin_prev") - if !prevVal.IsObject() { - subIt = base - } else { - subIt = buildIteratorTreeHelper(prevVal.Object(), ts, base) - } - - kind, _ := kindVal.ToString() - switch kind { - case "vertex": - if len(stringArgs) == 0 { - it = ts.GetNodesAllIterator() - } else { - fixed := ts.MakeFixed() - for _, name := range stringArgs { - fixed.AddValue(ts.GetIdFor(name)) - } - it = fixed - } - case "tag": - it = subIt - for _, tag := range stringArgs { - it.AddTag(tag) - } - case "save": - all := ts.GetNodesAllIterator() - if len(stringArgs) > 2 || len(stringArgs) == 0 { - return graph.NewNullIterator() - } - if len(stringArgs) == 2 { - all.AddTag(stringArgs[1]) - } else { - all.AddTag(stringArgs[0]) - } - predFixed := ts.MakeFixed() - predFixed.AddValue(ts.GetIdFor(stringArgs[0])) - subAnd := graph.NewAndIterator() - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, "p")) - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, all, "o")) - hasa := graph.NewHasaIterator(ts, subAnd, "s") - and := graph.NewAndIterator() - and.AddSubIterator(hasa) - and.AddSubIterator(subIt) - it = and - case "saver": - all := ts.GetNodesAllIterator() - if len(stringArgs) > 2 || len(stringArgs) == 0 { - return graph.NewNullIterator() - } - if len(stringArgs) == 2 { - all.AddTag(stringArgs[1]) - } else { - all.AddTag(stringArgs[0]) - } - predFixed := ts.MakeFixed() - predFixed.AddValue(ts.GetIdFor(stringArgs[0])) - subAnd := graph.NewAndIterator() - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, "p")) - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, all, "s")) - hasa := graph.NewHasaIterator(ts, subAnd, "o") - and := graph.NewAndIterator() - and.AddSubIterator(hasa) - and.AddSubIterator(subIt) - it = and - case "has": - fixed := ts.MakeFixed() - if len(stringArgs) < 2 { - return graph.NewNullIterator() - } - for _, name := range stringArgs[1:] { - fixed.AddValue(ts.GetIdFor(name)) - } - predFixed := ts.MakeFixed() - predFixed.AddValue(ts.GetIdFor(stringArgs[0])) - subAnd := graph.NewAndIterator() - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, predFixed, "p")) - subAnd.AddSubIterator(graph.NewLinksToIterator(ts, fixed, "o")) - hasa := graph.NewHasaIterator(ts, subAnd, "s") - and := graph.NewAndIterator() - and.AddSubIterator(hasa) - and.AddSubIterator(subIt) - it = and - case "morphism": - it = base - case "and": - arg, _ := obj.Get("_gremlin_values") - firstArg, _ := arg.Object().Get("0") - if !isVertexChain(firstArg.Object()) { - return graph.NewNullIterator() - } - argIt := buildIteratorTree(firstArg.Object(), ts) - - and := graph.NewAndIterator() - and.AddSubIterator(subIt) - and.AddSubIterator(argIt) - it = and - case "back": - arg, _ := obj.Get("_gremlin_back_chain") - argIt := buildIteratorTree(arg.Object(), ts) - and := graph.NewAndIterator() - and.AddSubIterator(subIt) - and.AddSubIterator(argIt) - it = and - case "is": - fixed := ts.MakeFixed() - for _, name := range stringArgs { - fixed.AddValue(ts.GetIdFor(name)) - } - and := graph.NewAndIterator() - and.AddSubIterator(fixed) - and.AddSubIterator(subIt) - it = and - case "or": - arg, _ := obj.Get("_gremlin_values") - firstArg, _ := arg.Object().Get("0") - if !isVertexChain(firstArg.Object()) { - return graph.NewNullIterator() - } - argIt := buildIteratorTree(firstArg.Object(), ts) - - or := graph.NewOrIterator() - or.AddSubIterator(subIt) - or.AddSubIterator(argIt) - it = or - case "both": - // Hardly the most efficient pattern, but the most general. - // Worth looking into an Optimize() optimization here. - clone := subIt.Clone() - it1 := buildInOutIterator(obj, ts, subIt, false) - it2 := buildInOutIterator(obj, ts, clone, true) - - or := graph.NewOrIterator() - or.AddSubIterator(it1) - or.AddSubIterator(it2) - it = or - case "out": - it = buildInOutIterator(obj, ts, subIt, false) - case "follow": - // Follow a morphism - arg, _ := obj.Get("_gremlin_values") - firstArg, _ := arg.Object().Get("0") - if isVertexChain(firstArg.Object()) { - return graph.NewNullIterator() - } - it = buildIteratorTreeHelper(firstArg.Object(), ts, subIt) - case "followr": - // Follow a morphism - arg, _ := obj.Get("_gremlin_followr") - if isVertexChain(arg.Object()) { - return graph.NewNullIterator() - } - it = buildIteratorTreeHelper(arg.Object(), ts, subIt) - case "in": - it = buildInOutIterator(obj, ts, subIt, true) - } - return it -} diff --git a/src/gremlin/gremlin-env.go b/src/gremlin/gremlin-env.go deleted file mode 100644 index 4e7f332..0000000 --- a/src/gremlin/gremlin-env.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gremlin - -// Builds a new Gremlin environment pointing at a session. - -import ( - "github.com/barakmich/glog" - "github.com/robertkrimen/otto" -) - -func BuildGremlinEnv(ses *GremlinSession) *otto.Otto { - env := otto.New() - setupGremlin(env, ses) - return env -} - -func concatStringArgs(call otto.FunctionCall) *[]interface{} { - outStrings := make([]interface{}, 0) - for _, arg := range call.ArgumentList { - if arg.IsString() { - outStrings = append(outStrings, arg.String()) - } - if arg.IsObject() && arg.Class() == "Array" { - obj, _ := arg.Export() - for _, x := range obj.([]interface{}) { - outStrings = append(outStrings, x.(string)) - } - } - } - return &outStrings -} - -func isVertexChain(obj *otto.Object) bool { - val, _ := obj.Get("_gremlin_type") - if x, _ := val.ToString(); x == "vertex" { - return true - } - val, _ = obj.Get("_gremlin_prev") - if val.IsObject() { - return isVertexChain(val.Object()) - } - return false -} - -func setupGremlin(env *otto.Otto, ses *GremlinSession) { - graph, _ := env.Object("graph = {}") - graph.Set("Vertex", func(call otto.FunctionCall) otto.Value { - call.Otto.Run("var out = {}") - out, err := call.Otto.Object("out") - if err != nil { - glog.Error(err.Error()) - return otto.TrueValue() - } - out.Set("_gremlin_type", "vertex") - outStrings := concatStringArgs(call) - if len(*outStrings) > 0 { - out.Set("string_args", *outStrings) - } - embedTraversals(env, ses, out) - embedFinals(env, ses, out) - return out.Value() - }) - - graph.Set("Morphism", func(call otto.FunctionCall) otto.Value { - call.Otto.Run("var out = {}") - out, _ := call.Otto.Object("out") - out.Set("_gremlin_type", "morphism") - embedTraversals(env, ses, out) - return out.Value() - }) - graph.Set("Emit", func(call otto.FunctionCall) otto.Value { - value := call.Argument(0) - if value.IsDefined() { - ses.SendResult(&GremlinResult{metaresult: false, err: "", val: &value, actualResults: nil}) - } - return otto.NullValue() - }) - env.Run("graph.V = graph.Vertex") - env.Run("graph.M = graph.Morphism") - env.Run("g = graph") - -} diff --git a/src/gremlin/gremlin-finals.go b/src/gremlin/gremlin-finals.go deleted file mode 100644 index 08bdd6d..0000000 --- a/src/gremlin/gremlin-finals.go +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gremlin - -import ( - "github.com/barakmich/glog" - "github.com/robertkrimen/otto" - "graph" -) - -const GremlinTopResultTag = "id" - -func embedFinals(env *otto.Otto, ses *GremlinSession, obj *otto.Object) { - obj.Set("All", allFunc(env, ses, obj)) - obj.Set("GetLimit", limitFunc(env, ses, obj)) - obj.Set("ToArray", toArrayFunc(env, ses, obj, false)) - obj.Set("ToValue", toValueFunc(env, ses, obj, false)) - obj.Set("TagArray", toArrayFunc(env, ses, obj, true)) - obj.Set("TagValue", toValueFunc(env, ses, obj, true)) - obj.Set("Map", mapFunc(env, ses, obj)) - obj.Set("ForEach", mapFunc(env, ses, obj)) -} - -func allFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - it := buildIteratorTree(obj, ses.ts) - it.AddTag(GremlinTopResultTag) - ses.limit = -1 - ses.count = 0 - runIteratorOnSession(it, ses) - return otto.NullValue() - } -} - -func limitFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - if len(call.ArgumentList) > 0 { - limitVal, _ := call.Argument(0).ToInteger() - it := buildIteratorTree(obj, ses.ts) - it.AddTag(GremlinTopResultTag) - ses.limit = int(limitVal) - ses.count = 0 - runIteratorOnSession(it, ses) - } - return otto.NullValue() - } -} - -func toArrayFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object, withTags bool) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - it := buildIteratorTree(obj, ses.ts) - it.AddTag(GremlinTopResultTag) - limit := -1 - if len(call.ArgumentList) > 0 { - limitParsed, _ := call.Argument(0).ToInteger() - limit = int(limitParsed) - } - var val otto.Value - var err error - if !withTags { - array := runIteratorToArrayNoTags(it, ses, limit) - val, err = call.Otto.ToValue(array) - } else { - array := runIteratorToArray(it, ses, limit) - val, err = call.Otto.ToValue(array) - } - - if err != nil { - glog.Error(err) - return otto.NullValue() - } - return val - } -} - -func toValueFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object, withTags bool) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - it := buildIteratorTree(obj, ses.ts) - it.AddTag(GremlinTopResultTag) - limit := 1 - var val otto.Value - var err error - if !withTags { - array := runIteratorToArrayNoTags(it, ses, limit) - if len(array) < 1 { - return otto.NullValue() - } - val, err = call.Otto.ToValue(array[0]) - } else { - array := runIteratorToArray(it, ses, limit) - if len(array) < 1 { - return otto.NullValue() - } - val, err = call.Otto.ToValue(array[0]) - } - if err != nil { - glog.Error(err) - return otto.NullValue() - } else { - return val - } - - } -} - -func mapFunc(env *otto.Otto, ses *GremlinSession, obj *otto.Object) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - it := buildIteratorTree(obj, ses.ts) - it.AddTag(GremlinTopResultTag) - limit := -1 - if len(call.ArgumentList) == 0 { - return otto.NullValue() - } - callback := call.Argument(len(call.ArgumentList) - 1) - if len(call.ArgumentList) > 1 { - limitParsed, _ := call.Argument(0).ToInteger() - limit = int(limitParsed) - } - runIteratorWithCallback(it, ses, callback, call, limit) - return otto.NullValue() - } -} - -func tagsToValueMap(m map[string]graph.TSVal, ses *GremlinSession) map[string]string { - outputMap := make(map[string]string) - for k, v := range m { - outputMap[k] = ses.ts.GetNameFor(v) - } - return outputMap -} - -func runIteratorToArray(it graph.Iterator, ses *GremlinSession, limit int) []map[string]string { - output := make([]map[string]string, 0) - count := 0 - it, _ = it.Optimize() - for { - if ses.doHalt { - return nil - } - _, ok := it.Next() - if !ok { - break - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - output = append(output, tagsToValueMap(tags, ses)) - count++ - if limit >= 0 && count >= limit { - break - } - for it.NextResult() == true { - if ses.doHalt { - return nil - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - output = append(output, tagsToValueMap(tags, ses)) - count++ - if limit >= 0 && count >= limit { - break - } - } - } - it.Close() - return output -} - -func runIteratorToArrayNoTags(it graph.Iterator, ses *GremlinSession, limit int) []string { - output := make([]string, 0) - count := 0 - it, _ = it.Optimize() - for { - if ses.doHalt { - return nil - } - val, ok := it.Next() - if !ok { - break - } - output = append(output, ses.ts.GetNameFor(val)) - count++ - if limit >= 0 && count >= limit { - break - } - } - it.Close() - return output -} - -func runIteratorWithCallback(it graph.Iterator, ses *GremlinSession, callback otto.Value, this otto.FunctionCall, limit int) { - count := 0 - it, _ = it.Optimize() - for { - if ses.doHalt { - return - } - _, ok := it.Next() - if !ok { - break - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - val, _ := this.Otto.ToValue(tagsToValueMap(tags, ses)) - val, _ = callback.Call(this.This, val) - count++ - if limit >= 0 && count >= limit { - break - } - for it.NextResult() == true { - if ses.doHalt { - return - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - val, _ := this.Otto.ToValue(tagsToValueMap(tags, ses)) - val, _ = callback.Call(this.This, val) - count++ - if limit >= 0 && count >= limit { - break - } - } - } - it.Close() -} - -func runIteratorOnSession(it graph.Iterator, ses *GremlinSession) { - if ses.lookingForQueryShape { - graph.OutputQueryShapeForIterator(it, ses.ts, &(ses.queryShape)) - return - } - it, _ = it.Optimize() - glog.V(2).Infoln(it.DebugString(0)) - for { - // TODO(barakmich): Better halting. - if ses.doHalt { - return - } - _, ok := it.Next() - if !ok { - break - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - cont := ses.SendResult(&GremlinResult{metaresult: false, err: "", val: nil, actualResults: &tags}) - if !cont { - break - } - for it.NextResult() == true { - if ses.doHalt { - return - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - cont := ses.SendResult(&GremlinResult{metaresult: false, err: "", val: nil, actualResults: &tags}) - if !cont { - break - } - } - } - it.Close() -} diff --git a/src/gremlin/gremlin-functional_test.go b/src/gremlin/gremlin-functional_test.go deleted file mode 100644 index f516965..0000000 --- a/src/gremlin/gremlin-functional_test.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gremlin - -import ( - . "github.com/smartystreets/goconvey/convey" - "graph_memstore" - "sort" - "testing" -) - -// +---+ +---+ -// | A |------- ->| F |<-- -// +---+ \------>+---+-/ +---+ \--+---+ -// ------>|#B#| | | E | -// +---+-------/ >+---+ | +---+ -// | C | / v -// +---+ -/ +---+ -// ---- +---+/ |#G#| -// \-->|#D#|------------->+---+ -// +---+ -// - -func buildTripleStore() *GremlinSession { - ts := graph_memstore.MakeTestingMemstore() - return NewGremlinSession(ts, -1) -} - -func shouldBeUnordered(actual interface{}, expected ...interface{}) string { - if len(expected) != 1 { - return "Only one list supported" - } - actualStr := actual.([]string) - expectedStr := expected[0].([]string) - sort.Strings(actualStr) - sort.Strings(expectedStr) - return ShouldResemble(actualStr, expectedStr) -} - -func runQueryGetTag(query string, tag string) ([]string, int) { - js := buildTripleStore() - output := make([]string, 0) - c := make(chan interface{}, 5) - js.ExecInput(query, c, -1) - count := 0 - for result := range c { - count++ - data := result.(*GremlinResult) - if data.val == nil { - val := (*data.actualResults)[tag] - if val != nil { - output = append(output, js.ts.GetNameFor(val)) - } - } - } - return output, count -} - -func ConveyQuery(doc string, query string, expected []string) { - ConveyQueryTag(doc, query, GremlinTopResultTag, expected) -} - -func ConveyQueryTag(doc string, query string, tag string, expected []string) { - Convey(doc, func() { - actual, _ := runQueryGetTag(query, tag) - So(actual, shouldBeUnordered, expected) - }) -} - -func TestGremlin(t *testing.T) { - Convey("With a default memtriplestore", t, func() { - - ConveyQuery("Can get a single vertex", - `g.V("A").All()`, - []string{"A"}) - - ConveyQuery("Can use .Out()", - `g.V("A").Out("follows").All()`, - []string{"B"}) - - ConveyQuery("Can use .In()", - `g.V("B").In("follows").All()`, - []string{"A", "C", "D"}) - - ConveyQuery("Can use .Both()", - `g.V("F").Both("follows").All()`, - []string{"B", "G", "E"}) - - ConveyQuery("Can use .Tag()-.Is()-.Back()", - `g.V("B").In("follows").Tag("foo").Out("status").Is("cool").Back("foo").All()`, - []string{"D"}) - - ConveyQuery("Can separate .Tag()-.Is()-.Back()", - ` - x = g.V("C").Out("follows").Tag("foo").Out("status").Is("cool").Back("foo") - x.In("follows").Is("D").Back("foo").All() - `, - []string{"B"}) - - Convey("Can do multiple .Back()s", func() { - query := ` - g.V("E").Out("follows").As("f").Out("follows").Out("status").Is("cool").Back("f").In("follows").In("follows").As("acd").Out("status").Is("cool").Back("f").All() - ` - expected := []string{"D"} - actual, _ := runQueryGetTag(query, "acd") - So(actual, shouldBeUnordered, expected) - }) - - }) -} - -func TestGremlinMorphism(t *testing.T) { - Convey("With a default memtriplestore", t, func() { - - ConveyQuery("Simple morphism works", - ` - grandfollows = g.M().Out("follows").Out("follows") - g.V("C").Follow(grandfollows).All() - `, - []string{"G", "F", "B"}) - - ConveyQuery("Reverse morphism works", - ` - grandfollows = g.M().Out("follows").Out("follows") - g.V("F").FollowR(grandfollows).All() - `, []string{"A", "C", "D"}) - - }) -} - -func TestGremlinIntersection(t *testing.T) { - Convey("With a default memtriplestore", t, func() { - ConveyQuery("Simple intersection", - ` - function follows(x) { return g.V(x).Out("follows") } - - follows("D").And(follows("C")).All() - `, []string{"B"}) - - ConveyQuery("Simple Morphism Intersection", - ` - grandfollows = g.M().Out("follows").Out("follows") - function gfollows(x) { return g.V(x).Follow(grandfollows) } - - gfollows("A").And(gfollows("C")).All() - `, []string{"F"}) - - ConveyQuery("Double Morphism Intersection", - ` - grandfollows = g.M().Out("follows").Out("follows") - function gfollows(x) { return g.V(x).Follow(grandfollows) } - - gfollows("E").And(gfollows("C")).And(gfollows("B")).All() - `, []string{"G"}) - - ConveyQuery("Reverse Intersection", - ` - grandfollows = g.M().Out("follows").Out("follows") - - g.V("G").FollowR(grandfollows).Intersect(g.V("F").FollowR(grandfollows)).All() - `, []string{"C"}) - - ConveyQuery("Standard sort of morphism intersection, continue follow", - ` - gfollowers = g.M().In("follows").In("follows") - function cool(x) { return g.V(x).As("a").Out("status").Is("cool").Back("a") } - cool("G").Follow(gfollowers).Intersect(cool("B").Follow(gfollowers)).All() - `, []string{"C"}) - - }) -} - -func TestGremlinHas(t *testing.T) { - Convey("With a default memtriplestore", t, func() { - ConveyQuery("Test a simple Has", - `g.V().Has("status", "cool").All()`, - []string{"G", "D", "B"}) - - ConveyQuery("Test a double Has", - `g.V().Has("status", "cool").Has("follows", "F").All()`, - []string{"B"}) - - }) -} - -func TestGremlinTag(t *testing.T) { - Convey("With a default memtriplestore", t, func() { - ConveyQueryTag("Test a simple save", - `g.V().Save("status", "somecool").All()`, - "somecool", - []string{"cool", "cool", "cool"}) - - ConveyQueryTag("Test a simple saveR", - `g.V("cool").SaveR("status", "who").All()`, - "who", - []string{"G", "D", "B"}) - - ConveyQueryTag("Test an out save", - `g.V("D").Out(null, "pred").All()`, - "pred", - []string{"follows", "follows", "status"}) - - ConveyQueryTag("Test a tag list", - `g.V("D").Out(null, ["pred", "foo", "bar"]).All()`, - "foo", - []string{"follows", "follows", "status"}) - - ConveyQuery("Test a pred list", - `g.V("D").Out(["follows", "status"]).All()`, - []string{"B", "G", "cool"}) - - ConveyQuery("Test a predicate path", - `g.V("D").Out(g.V("follows"), "pred").All()`, - []string{"B", "G"}) - }) -} diff --git a/src/gremlin/gremlin-session.go b/src/gremlin/gremlin-session.go deleted file mode 100644 index 25b3fbb..0000000 --- a/src/gremlin/gremlin-session.go +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gremlin - -import ( - "errors" - "fmt" - "github.com/robertkrimen/otto" - "graph" - "sort" - "time" -) - -type GremlinSession struct { - ts graph.TripleStore - currentChannel chan interface{} - env *otto.Otto - debug bool - limit int - count int - dataOutput []interface{} - lookingForQueryShape bool - queryShape map[string]interface{} - err error - script *otto.Script - doHalt bool - timeoutSec time.Duration - emptyEnv *otto.Otto -} - -func NewGremlinSession(inputTripleStore graph.TripleStore, timeoutSec int, persist bool) *GremlinSession { - var g GremlinSession - g.ts = inputTripleStore - g.env = BuildGremlinEnv(&g) - g.limit = -1 - g.count = 0 - g.lookingForQueryShape = false - if persist { - g.emptyEnv = g.env - } - if timeoutSec < 0 { - g.timeoutSec = time.Duration(-1) - } else { - g.timeoutSec = time.Duration(timeoutSec) - } - g.ClearJson() - return &g -} - -type GremlinResult struct { - metaresult bool - err string - val *otto.Value - actualResults *map[string]graph.TSVal -} - -func (g *GremlinSession) ToggleDebug() { - g.debug = !g.debug -} - -func (g *GremlinSession) GetQuery(input string, output_struct chan map[string]interface{}) { - defer close(output_struct) - g.queryShape = make(map[string]interface{}) - g.lookingForQueryShape = true - g.env.Run(input) - output_struct <- g.queryShape - g.queryShape = nil -} - -func (g *GremlinSession) InputParses(input string) (graph.ParseResult, error) { - script, err := g.env.Compile("", input) - if err != nil { - return graph.ParseFail, err - } - g.script = script - return graph.Parsed, nil -} - -func (g *GremlinSession) SendResult(result *GremlinResult) bool { - if g.limit >= 0 && g.limit == g.count { - return false - } - if g.doHalt { - return false - } - if g.currentChannel != nil { - g.currentChannel <- result - g.count++ - if g.limit >= 0 && g.limit == g.count { - return false - } else { - return true - } - } - return false -} - -var halt = errors.New("Query Timeout") - -func (g *GremlinSession) runUnsafe(input interface{}) (otto.Value, error) { - g.doHalt = false - defer func() { - if caught := recover(); caught != nil { - if caught == halt { - g.err = halt - return - } - panic(caught) // Something else happened, repanic! - } - }() - - g.env.Interrupt = make(chan func(), 1) // The buffer prevents blocking - - if g.timeoutSec != -1 { - go func() { - time.Sleep(g.timeoutSec * time.Second) // Stop after two seconds - g.doHalt = true - if g.env != nil { - g.env.Interrupt <- func() { - panic(halt) - } - g.env = g.emptyEnv - } - }() - } - - return g.env.Run(input) // Here be dragons (risky code) -} - -func (g *GremlinSession) ExecInput(input string, out chan interface{}, limit int) { - defer close(out) - g.err = nil - g.currentChannel = out - var err error - var value otto.Value - if g.script == nil { - value, err = g.runUnsafe(input) - } else { - value, err = g.runUnsafe(g.script) - } - if err != nil { - out <- &GremlinResult{metaresult: true, - err: err.Error(), - val: &value, - actualResults: nil} - } else { - out <- &GremlinResult{metaresult: true, - err: "", - val: &value, - actualResults: nil} - } - g.currentChannel = nil - g.script = nil - g.env = g.emptyEnv - return -} - -func (s *GremlinSession) ToText(result interface{}) string { - data := result.(*GremlinResult) - if data.metaresult { - if data.err != "" { - return fmt.Sprintln("Error: ", data.err) - } - if data.val != nil { - s, _ := data.val.Export() - if data.val.IsObject() { - typeVal, _ := data.val.Object().Get("_gremlin_type") - if !typeVal.IsUndefined() { - s = "[internal Iterator]" - } - } - return fmt.Sprintln("=>", s) - } - return "" - } - var out string - out = fmt.Sprintln("****") - if data.val == nil { - tags := data.actualResults - tagKeys := make([]string, len(*tags)) - i := 0 - for k, _ := range *tags { - tagKeys[i] = k - i++ - } - sort.Strings(tagKeys) - for _, k := range tagKeys { - if k == "$_" { - continue - } - out += fmt.Sprintf("%s : %s\n", k, s.ts.GetNameFor((*tags)[k])) - } - } else { - if data.val.IsObject() { - export, _ := data.val.Export() - mapExport := export.(map[string]string) - for k, v := range mapExport { - out += fmt.Sprintf("%s : %v\n", k, v) - } - } else { - strVersion, _ := data.val.ToString() - out += fmt.Sprintf("%s\n", strVersion) - } - } - return out -} - -// Web stuff -func (ses *GremlinSession) BuildJson(result interface{}) { - data := result.(*GremlinResult) - if !data.metaresult { - if data.val == nil { - obj := make(map[string]string) - tags := data.actualResults - tagKeys := make([]string, len(*tags)) - i := 0 - for k, _ := range *tags { - tagKeys[i] = k - i++ - } - sort.Strings(tagKeys) - for _, k := range tagKeys { - obj[k] = ses.ts.GetNameFor((*tags)[k]) - } - ses.dataOutput = append(ses.dataOutput, obj) - } else { - if data.val.IsObject() { - export, _ := data.val.Export() - ses.dataOutput = append(ses.dataOutput, export) - } else { - strVersion, _ := data.val.ToString() - ses.dataOutput = append(ses.dataOutput, strVersion) - } - } - } - -} - -func (ses *GremlinSession) GetJson() (interface{}, error) { - defer ses.ClearJson() - if ses.err != nil { - return nil, ses.err - } - if ses.doHalt { - return nil, halt - } - return ses.dataOutput, nil -} - -func (ses *GremlinSession) ClearJson() { - ses.dataOutput = nil -} diff --git a/src/gremlin/gremlin-traversals.go b/src/gremlin/gremlin-traversals.go deleted file mode 100644 index c0f4704..0000000 --- a/src/gremlin/gremlin-traversals.go +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gremlin - -// Adds special traversal functions to JS Gremlin objects. Most of these just build the chain of objects, and won't often need the session. - -import ( - "github.com/barakmich/glog" - "github.com/robertkrimen/otto" -) - -func embedTraversals(env *otto.Otto, ses *GremlinSession, obj *otto.Object) { - obj.Set("In", gremlinFunc("in", obj, env, ses)) - obj.Set("Out", gremlinFunc("out", obj, env, ses)) - obj.Set("Is", gremlinFunc("is", obj, env, ses)) - obj.Set("Both", gremlinFunc("both", obj, env, ses)) - obj.Set("Follow", gremlinFunc("follow", obj, env, ses)) - obj.Set("FollowR", gremlinFollowR("followr", obj, env, ses)) - obj.Set("And", gremlinFunc("and", obj, env, ses)) - obj.Set("Intersect", gremlinFunc("and", obj, env, ses)) - obj.Set("Union", gremlinFunc("or", obj, env, ses)) - obj.Set("Or", gremlinFunc("or", obj, env, ses)) - obj.Set("Back", gremlinBack("back", obj, env, ses)) - obj.Set("Tag", gremlinFunc("tag", obj, env, ses)) - obj.Set("As", gremlinFunc("tag", obj, env, ses)) - obj.Set("Has", gremlinFunc("has", obj, env, ses)) - obj.Set("Save", gremlinFunc("save", obj, env, ses)) - obj.Set("SaveR", gremlinFunc("saver", obj, env, ses)) -} - -func gremlinFunc(kind string, prevObj *otto.Object, env *otto.Otto, ses *GremlinSession) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - call.Otto.Run("var out = {}") - out, _ := call.Otto.Object("out") - out.Set("_gremlin_type", kind) - out.Set("_gremlin_values", call.ArgumentList) - out.Set("_gremlin_prev", prevObj) - outStrings := concatStringArgs(call) - if len(*outStrings) > 0 { - out.Set("string_args", *outStrings) - } - embedTraversals(env, ses, out) - if isVertexChain(call.This.Object()) { - embedFinals(env, ses, out) - } - return out.Value() - } -} - -func gremlinBack(kind string, prevObj *otto.Object, env *otto.Otto, ses *GremlinSession) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - call.Otto.Run("var out = {}") - out, _ := call.Otto.Object("out") - out.Set("_gremlin_type", kind) - out.Set("_gremlin_values", call.ArgumentList) - outStrings := concatStringArgs(call) - if len(*outStrings) > 0 { - out.Set("string_args", *outStrings) - } - var otherChain *otto.Object - var thisObj *otto.Object - if len(*outStrings) != 0 { - otherChain, thisObj = reverseGremlinChainTo(call.Otto, prevObj, (*outStrings)[0].(string)) - } else { - otherChain, thisObj = reverseGremlinChainTo(call.Otto, prevObj, "") - } - out.Set("_gremlin_prev", thisObj) - out.Set("_gremlin_back_chain", otherChain) - embedTraversals(env, ses, out) - if isVertexChain(call.This.Object()) { - embedFinals(env, ses, out) - } - return out.Value() - - } -} - -func gremlinFollowR(kind string, prevObj *otto.Object, env *otto.Otto, ses *GremlinSession) func(otto.FunctionCall) otto.Value { - return func(call otto.FunctionCall) otto.Value { - call.Otto.Run("var out = {}") - out, _ := call.Otto.Object("out") - out.Set("_gremlin_type", kind) - out.Set("_gremlin_values", call.ArgumentList) - outStrings := concatStringArgs(call) - if len(*outStrings) > 0 { - out.Set("string_args", *outStrings) - } - if len(call.ArgumentList) == 0 { - return prevObj.Value() - } - arg := call.Argument(0) - if isVertexChain(arg.Object()) { - return prevObj.Value() - } - newChain, _ := reverseGremlinChainTo(call.Otto, arg.Object(), "") - out.Set("_gremlin_prev", prevObj) - out.Set("_gremlin_followr", newChain) - embedTraversals(env, ses, out) - if isVertexChain(call.This.Object()) { - embedFinals(env, ses, out) - } - return out.Value() - - } -} - -func reverseGremlinChainTo(env *otto.Otto, prevObj *otto.Object, tag string) (*otto.Object, *otto.Object) { - env.Run("var _base_object = {}") - base, err := env.Object("_base_object") - if err != nil { - glog.Error(err) - return otto.NullValue().Object(), otto.NullValue().Object() - } - if isVertexChain(prevObj) { - base.Set("_gremlin_type", "vertex") - } else { - base.Set("_gremlin_type", "morphism") - } - return reverseGremlinChainHelper(env, prevObj, base, tag) -} - -func reverseGremlinChainHelper(env *otto.Otto, chain *otto.Object, newBase *otto.Object, tag string) (*otto.Object, *otto.Object) { - kindVal, _ := chain.Get("_gremlin_type") - kind, _ := kindVal.ToString() - - if tag != "" { - if kind == "tag" { - tags := getStringArgs(chain) - for _, t := range tags { - if t == tag { - return newBase, chain - } - } - } - } - - if kind == "morphism" || kind == "vertex" { - return newBase, chain - } - var newKind string - switch kind { - case "in": - newKind = "out" - case "out": - newKind = "in" - default: - newKind = kind - } - prev, _ := chain.Get("_gremlin_prev") - env.Run("var out = {}") - out, _ := env.Object("out") - out.Set("_gremlin_type", newKind) - values, _ := chain.Get("_gremlin_values") - out.Set("_gremlin_values", values) - back, _ := chain.Get("_gremlin_back_chain") - out.Set("_gremlin_back_chain", back) - out.Set("_gremlin_prev", newBase) - strings, _ := chain.Get("string_args") - out.Set("string_args", strings) - return reverseGremlinChainHelper(env, prev.Object(), out, tag) -} - -func debugChain(obj *otto.Object) bool { - val, _ := obj.Get("_gremlin_type") - x, _ := val.ToString() - glog.V(2).Infoln(x) - val, _ = obj.Get("_gremlin_prev") - if val.IsObject() { - return debugChain(val.Object()) - } - return false -} diff --git a/src/gremlin/gremlin_test.nt b/src/gremlin/gremlin_test.nt deleted file mode 100644 index 3febca3..0000000 --- a/src/gremlin/gremlin_test.nt +++ /dev/null @@ -1,11 +0,0 @@ -A follows B . -C follows B . -C follows D . -D follows B . -B follows F . -F follows G . -D follows G . -E follows F . -B status cool . -D status cool . -G status cool . diff --git a/src/mql/mql-build-iterator.go b/src/mql/mql-build-iterator.go deleted file mode 100644 index 8f7d8d1..0000000 --- a/src/mql/mql-build-iterator.go +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mql - -import ( - "errors" - "fmt" - "graph" - "log" - "math" - "strings" -) - -func (m *MqlQuery) buildFixed(s string) graph.Iterator { - f := m.ses.ts.MakeFixed() - f.AddValue(m.ses.ts.GetIdFor(s)) - return f -} - -func (m *MqlQuery) buildResultIterator(path MqlPath) graph.Iterator { - all := m.ses.ts.GetNodesAllIterator() - all.AddTag(string(path)) - return graph.NewOptionalIterator(all) -} - -func (m *MqlQuery) BuildIteratorTree(query interface{}) { - m.isRepeated = make(map[MqlPath]bool) - m.queryStructure = make(map[MqlPath]map[string]interface{}) - m.queryResult = make(map[MqlResultPath]map[string]interface{}) - m.queryResult[""] = make(map[string]interface{}) - - m.it, m.err = m.buildIteratorTreeInternal(query, NewMqlPath()) - if m.err != nil { - m.isError = true - } -} - -func (m *MqlQuery) buildIteratorTreeInternal(query interface{}, path MqlPath) (graph.Iterator, error) { - var it graph.Iterator - var err error - err = nil - switch t := query.(type) { - case bool: - // for JSON booleans - // Treat the bool as a string and call it a day. - // Things which are really bool-like are special cases and will be dealt with separately. - if t { - it = m.buildFixed("true") - } - it = m.buildFixed("false") - case float64: - // for JSON numbers - // Damn you, Javascript, and your lack of integer values. - if math.Floor(t) == t { - // Treat it like an integer. - it = m.buildFixed(fmt.Sprintf("%d", t)) - } else { - it = m.buildFixed(fmt.Sprintf("%f", t)) - } - case string: - // for JSON strings - it = m.buildFixed(t) - case []interface{}: - // for JSON arrays - m.isRepeated[path] = true - if len(t) == 0 { - it = m.buildResultIterator(path) - } else if len(t) == 1 { - it, err = m.buildIteratorTreeInternal(t[0], path) - } else { - err = errors.New(fmt.Sprintf("Multiple fields at location root%s", path.DisplayString())) - } - case map[string]interface{}: - // for JSON objects - it, err = m.buildIteratorTreeMapInternal(t, path) - case nil: - it = m.buildResultIterator(path) - default: - log.Fatal("Unknown JSON type?", query) - } - if err != nil { - return nil, err - } - it.AddTag(string(path)) - return it, nil -} - -func (m *MqlQuery) buildIteratorTreeMapInternal(query map[string]interface{}, path MqlPath) (graph.Iterator, error) { - it := graph.NewAndIterator() - it.AddSubIterator(m.ses.ts.GetNodesAllIterator()) - var err error - err = nil - outputStructure := make(map[string]interface{}) - for key, subquery := range query { - outputStructure[key] = nil - reverse := false - pred := key - if strings.HasPrefix(pred, "@") { - i := strings.Index(pred, ":") - if i != -1 { - pred = pred[(i + 1):] - } - } - if strings.HasPrefix(pred, "!") { - reverse = true - pred = strings.TrimPrefix(pred, "!") - } - - // Other special constructs here - var subit graph.Iterator - if key == "id" { - subit, err = m.buildIteratorTreeInternal(subquery, path.Follow(key)) - if err != nil { - return nil, err - } - it.AddSubIterator(subit) - } else { - subit, err = m.buildIteratorTreeInternal(subquery, path.Follow(key)) - if err != nil { - return nil, err - } - subAnd := graph.NewAndIterator() - predFixed := m.ses.ts.MakeFixed() - predFixed.AddValue(m.ses.ts.GetIdFor(pred)) - subAnd.AddSubIterator(graph.NewLinksToIterator(m.ses.ts, predFixed, "p")) - if reverse { - lto := graph.NewLinksToIterator(m.ses.ts, subit, "s") - subAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(m.ses.ts, subAnd, "o") - it.AddSubIterator(hasa) - } else { - lto := graph.NewLinksToIterator(m.ses.ts, subit, "o") - subAnd.AddSubIterator(lto) - hasa := graph.NewHasaIterator(m.ses.ts, subAnd, "s") - it.AddSubIterator(hasa) - } - } - } - if err != nil { - return nil, err - } - m.queryStructure[path] = outputStructure - return it, nil -} - -type MqlResultPathSlice []MqlResultPath - -func (sl MqlResultPathSlice) Len() int { - return len(sl) -} - -func (sl MqlResultPathSlice) Less(i, j int) bool { - iLen := len(strings.Split(string(sl[i]), "\x30")) - jLen := len(strings.Split(string(sl[j]), "\x30")) - if iLen < jLen { - return true - } - if iLen == jLen { - if len(string(sl[i])) < len(string(sl[j])) { - return true - } - } - return false -} - -func (sl MqlResultPathSlice) Swap(i, j int) { - sl[i], sl[j] = sl[j], sl[i] -} diff --git a/src/mql/mql-fill.go b/src/mql/mql-fill.go deleted file mode 100644 index 72fb2b4..0000000 --- a/src/mql/mql-fill.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mql - -import ( - "graph" - "sort" -) - -func (m *MqlQuery) treeifyResult(tags map[string]graph.TSVal) map[MqlResultPath]string { - // Transform the map into something a little more interesting. - results := make(map[MqlPath]string) - for k, v := range tags { - results[MqlPath(k)] = m.ses.ts.GetNameFor(v) - } - resultPaths := make(map[MqlResultPath]string) - for k, v := range results { - resultPaths[k.ToResultPathFromMap(results)] = v - } - - var paths MqlResultPathSlice - - for path, _ := range resultPaths { - paths = append(paths, path) - } - - sort.Sort(paths) - - // Build Structure - for _, path := range paths { - currentPath := path.getPath() - value := resultPaths[path] - namePath := path.AppendValue(value) - if _, ok := m.queryResult[namePath]; !ok { - targetPath, key := path.splitLastPath() - if path == "" { - targetPath, key = "", value - if _, ok := m.queryResult[""][value]; !ok { - m.resultOrder = append(m.resultOrder, value) - } - } - if _, ok := m.queryStructure[currentPath]; ok { - // If there's substructure, then copy that in. - newStruct := m.copyPathStructure(currentPath) - if m.isRepeated[currentPath] && currentPath != "" { - switch t := m.queryResult[targetPath][key].(type) { - case nil: - x := make([]interface{}, 0) - x = append(x, newStruct) - m.queryResult[targetPath][key] = x - m.queryResult[namePath] = newStruct - case []interface{}: - m.queryResult[targetPath][key] = append(t, newStruct) - m.queryResult[namePath] = newStruct - } - - } else { - m.queryResult[namePath] = newStruct - m.queryResult[targetPath][key] = newStruct - } - } - } - } - - // Fill values - for _, path := range paths { - currentPath := path.getPath() - value := resultPaths[path] - namePath := path.AppendValue(value) - if _, ok := m.queryStructure[currentPath]; ok { - // We're dealing with ids. - if _, ok := m.queryResult[namePath]["id"]; ok { - m.queryResult[namePath]["id"] = value - } - } else { - // Just a value. - targetPath, key := path.splitLastPath() - if m.isRepeated[currentPath] { - switch t := m.queryResult[targetPath][key].(type) { - case nil: - x := make([]interface{}, 0) - x = append(x, value) - m.queryResult[targetPath][key] = x - case []interface{}: - m.queryResult[targetPath][key] = append(t, value) - } - - } else { - m.queryResult[targetPath][key] = value - } - } - } - - return resultPaths -} - -func (m *MqlQuery) buildResults() { - for _, v := range m.resultOrder { - m.results = append(m.results, m.queryResult[""][v]) - } -} diff --git a/src/mql/mql-functional_test.go b/src/mql/mql-functional_test.go deleted file mode 100644 index 396e75c..0000000 --- a/src/mql/mql-functional_test.go +++ /dev/null @@ -1,262 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mql - -import ( - "encoding/json" - . "github.com/smartystreets/goconvey/convey" - "graph_memstore" - "testing" -) - -// +---+ +---+ -// | A |------- ->| F |<-- -// +---+ \------>+---+-/ +---+ \--+---+ -// ------>|#B#| | | E | -// +---+-------/ >+---+ | +---+ -// | C | / v -// +---+ -/ +---+ -// ---- +---+/ |#G#| -// \-->|#D#|------------->+---+ -// +---+ -// - -func buildTripleStore() *MqlSession { - ts := graph_memstore.MakeTestingMemstore() - return NewMqlSession(ts) -} - -func compareJsonInterfaces(actual interface{}, expected interface{}, path MqlPath, t *testing.T) { - isError := false - switch ex := expected.(type) { - case bool: - switch ac := actual.(type) { - case bool: - if ac != ex { - isError = true - } - default: - t.Log("Mismatched type") - isError = true - } - case float64: - switch ac := actual.(type) { - case float64: - if ac != ex { - isError = true - } - default: - t.Log("Mismatched type") - isError = true - } - case string: - switch ac := actual.(type) { - case string: - if ac != ex { - isError = true - } - default: - isError = true - } - case []interface{}: - switch ac := actual.(type) { - case []interface{}: - if len(ac) != len(ex) { - t.Log("Different lengths") - isError = true - } else { - for i, elem := range ex { - compareJsonInterfaces(ac[i], elem, path.Follow(string(i)), t) - } - } - default: - t.Log("Mismatched type") - isError = true - } - case map[string]interface{}: - switch ac := actual.(type) { - case map[string]interface{}: - for k, v := range ex { - actual_value, ok := ac[k] - if !ok { - t.Log("Key", k, "not in actual output.") - isError = true - } else { - compareJsonInterfaces(actual_value, v, path.Follow(string(k)), t) - } - } - default: - t.Log("Mismatched type") - isError = true - } - case nil: - switch ac := actual.(type) { - case nil: - if ac != ex { - isError = true - } - default: - t.Log("Mismatched type") - isError = true - } - default: - t.Error("Unknown JSON type?", expected) - } - - if isError { - actual_bytes, _ := json.MarshalIndent(actual, "", " ") - expected_bytes, _ := json.MarshalIndent(expected, "", " ") - t.Error(path.DisplayString(), ":\n", string(actual_bytes), "\nexpected", string(expected_bytes)) - } -} - -func runAndTestQuery(query string, expected string, t *testing.T) { - ses := buildTripleStore() - c := make(chan interface{}, 5) - go ses.ExecInput(query, c, -1) - for result := range c { - ses.BuildJson(result) - } - actual_struct, _ := ses.GetJson() - var expected_struct interface{} - json.Unmarshal([]byte(expected), &expected_struct) - compareJsonInterfaces(actual_struct, expected_struct, NewMqlPath(), t) - ses.ClearJson() -} - -func TestGetAllIds(t *testing.T) { - Convey("Should get all IDs in the database", t, func() { - query := ` - [{"id": null}] - ` - expected := ` - [ - {"id": "A"}, - {"id": "follows"}, - {"id": "B"}, - {"id": "C"}, - {"id": "D"}, - {"id": "F"}, - {"id": "G"}, - {"id": "E"}, - {"id": "status"}, - {"id": "cool"}, - {"id": "status_graph"} - ] - ` - runAndTestQuery(query, expected, t) - }) -} - -func TestGetCool(t *testing.T) { - query := ` - [{"id": null, "status": "cool"}] - ` - expected := ` - [ - {"id": "B", "status": "cool"}, - {"id": "D", "status": "cool"}, - {"id": "G", "status": "cool"} - ] - ` - runAndTestQuery(query, expected, t) -} - -func TestGetFollowsList(t *testing.T) { - query := ` - [{"id": "C", "follows": []}] - ` - expected := ` - [{ - "id": "C", - "follows": [ - "B", "D" - ] - }] - ` - runAndTestQuery(query, expected, t) -} - -func TestGetFollowsStruct(t *testing.T) { - query := ` - [{"id": null, "follows": {"id": null, "status": "cool"}}] - ` - expected := ` - [ - {"id": "A", "follows": {"id": "B", "status": "cool"}}, - {"id": "C", "follows": {"id": "D", "status": "cool"}}, - {"id": "D", "follows": {"id": "G", "status": "cool"}}, - {"id": "F", "follows": {"id": "G", "status": "cool"}} - ] - ` - runAndTestQuery(query, expected, t) -} - -func TestGetFollowsReverseStructList(t *testing.T) { - query := ` - [{"id": null, "!follows": [{"id": null, "status" : "cool"}]}] - ` - expected := ` - [ - {"id": "F", "!follows": [{"id": "B", "status": "cool"}]}, - {"id": "B", "!follows": [{"id": "D", "status": "cool"}]}, - {"id": "G", "!follows": [{"id": "D", "status": "cool"}]} - ] - ` - runAndTestQuery(query, expected, t) -} - -func TestGetRevFollowsList(t *testing.T) { - query := ` - [{"id": "F", "!follows": []}] - ` - expected := ` - [{ - "id": "F", - "!follows": [ - "B", "E" - ] - }] - ` - runAndTestQuery(query, expected, t) -} - -func TestCoFollows(t *testing.T) { - query := ` - [{"id": null, "@A:follows": "B", "@B:follows": "D"}] - ` - expected := ` - [{ - "id": "C", - "@A:follows": "B", - "@B:follows": "D" - }] - ` - runAndTestQuery(query, expected, t) -} - -func TestRevCoFollows(t *testing.T) { - query := ` - [{"id": null, "!follows": {"id": "C"}, "@a:!follows": "D"}] - ` - expected := ` - [{ - "id": "B", - "!follows": {"id": "C"}, - "@a:!follows": "D" - }] - ` - runAndTestQuery(query, expected, t) -} diff --git a/src/mql/mql-query.go b/src/mql/mql-query.go deleted file mode 100644 index a56ad5f..0000000 --- a/src/mql/mql-query.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mql - -import ( - "fmt" - "graph" - "strings" -) - -type MqlPath string -type MqlResultPath string - -type MqlQuery struct { - ses *MqlSession - it graph.Iterator - isRepeated map[MqlPath]bool - queryStructure map[MqlPath]map[string]interface{} - queryResult map[MqlResultPath]map[string]interface{} - results []interface{} - resultOrder []string - isError bool - err error -} - -func (mqlQuery *MqlQuery) copyPathStructure(path MqlPath) map[string]interface{} { - output := make(map[string]interface{}) - for k, v := range mqlQuery.queryStructure[path] { - output[k] = v - } - return output -} - -func NewMqlPath() MqlPath { - return "" -} -func (p MqlPath) Follow(s string) MqlPath { - return MqlPath(fmt.Sprintf("%s\x1E%s", p, s)) -} - -func (p MqlPath) DisplayString() string { - return strings.Replace(string(p), "\x1E", ".", -1) -} - -func NewMqlResultPath() MqlResultPath { - return "" -} - -func (p MqlResultPath) FollowPath(followPiece string, value string) MqlResultPath { - if string(p) == "" { - return MqlResultPath(fmt.Sprintf("%s\x1E%s", value, followPiece)) - } - return MqlResultPath(fmt.Sprintf("%s\x1E%s\x1E%s", p, value, followPiece)) -} - -func (p MqlResultPath) getPath() MqlPath { - out := NewMqlPath() - pathPieces := strings.Split(string(p), "\x1E") - for len(pathPieces) > 1 { - a := pathPieces[1] - pathPieces = pathPieces[2:] - out = out.Follow(a) - } - return out -} - -func (p MqlResultPath) splitLastPath() (MqlResultPath, string) { - pathPieces := strings.Split(string(p), "\x1E") - return MqlResultPath(strings.Join(pathPieces[:len(pathPieces)-1], "\x1E")), pathPieces[len(pathPieces)-1] -} - -func (p MqlResultPath) AppendValue(value string) MqlResultPath { - if string(p) == "" { - return MqlResultPath(value) - } - return MqlResultPath(fmt.Sprintf("%s\x1E%s", p, value)) -} - -func (p MqlPath) ToResultPathFromMap(resultMap map[MqlPath]string) MqlResultPath { - output := NewMqlResultPath() - pathPieces := strings.Split(string(p), "\x1E")[1:] - pathSoFar := NewMqlPath() - for _, piece := range pathPieces { - output = output.FollowPath(piece, resultMap[pathSoFar]) - pathSoFar = pathSoFar.Follow(piece) - } - return output -} - -func NewMqlQuery(ses *MqlSession) *MqlQuery { - var q MqlQuery - q.ses = ses - q.results = make([]interface{}, 0) - q.resultOrder = make([]string, 0) - q.err = nil - q.isError = false - return &q -} diff --git a/src/mql/mql-session.go b/src/mql/mql-session.go deleted file mode 100644 index e08e75e..0000000 --- a/src/mql/mql-session.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mql - -import ( - "encoding/json" - "fmt" - "github.com/barakmich/glog" - "graph" - "sort" -) - -type MqlSession struct { - ts graph.TripleStore - currentQuery *MqlQuery - debug bool -} - -func NewMqlSession(ts graph.TripleStore) *MqlSession { - var m MqlSession - m.ts = ts - return &m -} - -func (m *MqlSession) ToggleDebug() { - m.debug = !m.debug -} - -func (m *MqlSession) GetQuery(input string, output_struct chan map[string]interface{}) { - defer close(output_struct) - var mqlQuery interface{} - err := json.Unmarshal([]byte(input), &mqlQuery) - if err != nil { - return - } - m.currentQuery = NewMqlQuery(m) - m.currentQuery.BuildIteratorTree(mqlQuery) - output := make(map[string]interface{}) - graph.OutputQueryShapeForIterator(m.currentQuery.it, m.ts, &output) - nodes := output["nodes"].([]graph.Node) - new_nodes := make([]graph.Node, 0) - for _, n := range nodes { - n.Tags = nil - new_nodes = append(new_nodes, n) - } - output["nodes"] = new_nodes - output_struct <- output -} - -func (m *MqlSession) InputParses(input string) (graph.ParseResult, error) { - var x interface{} - err := json.Unmarshal([]byte(input), &x) - if err != nil { - return graph.ParseFail, err - } - return graph.Parsed, nil -} - -func (m *MqlSession) ExecInput(input string, c chan interface{}, limit int) { - defer close(c) - var mqlQuery interface{} - err := json.Unmarshal([]byte(input), &mqlQuery) - if err != nil { - return - } - m.currentQuery = NewMqlQuery(m) - m.currentQuery.BuildIteratorTree(mqlQuery) - if m.currentQuery.isError { - return - } - it, _ := m.currentQuery.it.Optimize() - if glog.V(2) { - glog.V(2).Infoln(it.DebugString(0)) - } - for { - _, ok := it.Next() - if !ok { - break - } - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - c <- &tags - for it.NextResult() == true { - tags := make(map[string]graph.TSVal) - it.TagResults(&tags) - c <- &tags - } - } -} - -func (m *MqlSession) ToText(result interface{}) string { - tags := *(result.(*map[string]graph.TSVal)) - out := fmt.Sprintln("****") - tagKeys := make([]string, len(tags)) - m.currentQuery.treeifyResult(tags) - m.currentQuery.buildResults() - r, _ := json.MarshalIndent(m.currentQuery.results, "", " ") - fmt.Println(string(r)) - i := 0 - for k, _ := range tags { - tagKeys[i] = string(k) - i++ - } - sort.Strings(tagKeys) - for _, k := range tagKeys { - if k == "$_" { - continue - } - out += fmt.Sprintf("%s : %s\n", k, m.ts.GetNameFor(tags[k])) - } - return out -} - -func (m *MqlSession) BuildJson(result interface{}) { - m.currentQuery.treeifyResult(*(result.(*map[string]graph.TSVal))) -} - -func (m *MqlSession) GetJson() (interface{}, error) { - m.currentQuery.buildResults() - if m.currentQuery.isError { - return nil, m.currentQuery.err - } else { - return m.currentQuery.results, nil - } -} - -func (m *MqlSession) ClearJson() { - // Since we create a new MqlQuery underneath every query, clearing isn't necessary. - return -} diff --git a/src/nquads/nquads.go b/src/nquads/nquads.go deleted file mode 100644 index 63ec89c..0000000 --- a/src/nquads/nquads.go +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package nquads - -import ( - "bufio" - "github.com/barakmich/glog" - "graph" - "io" - "strings" -) - -func isWhitespace(s uint8) bool { - return (s == '\t' || s == '\r' || s == ' ') -} -func ParseLineToTriple(str string) *graph.Triple { - // Skip leading whitespace. - str = skipWhitespace(str) - // Check for a comment - if str != "" && str[0] == '#' { - return nil - } - sub, remainder := getTripleComponent(str) - if sub == nil { - return nil - } - str = skipWhitespace(remainder) - pred, remainder := getTripleComponent(str) - if pred == nil { - return nil - } - str = skipWhitespace(remainder) - obj, remainder := getTripleComponent(str) - if obj == nil { - return nil - } - str = skipWhitespace(remainder) - prov_ptr, remainder := getTripleComponent(str) - var prov string - if prov_ptr == nil { - prov = "" - } else { - prov = *prov_ptr - } - str = skipWhitespace(remainder) - if str != "" && str[0] == '.' { - return graph.MakeTriple(*sub, *pred, *obj, prov) - } - return nil -} - -func skipWhitespace(str string) string { - i := 0 - for i < len(str) && isWhitespace(str[i]) { - i += 1 - } - return str[i:] -} - -func getTripleComponent(str string) (*string, string) { - if len(str) == 0 { - return nil, str - } - if str[0] == '<' { - return getUriPart(str[1:]) - } else if str[0] == '"' { - return getQuotedPart(str[1:]) - } else if str[0] == '.' { - return nil, str - } else { - // Technically not part of the spec. But we do it anyway for convenience. - return getUnquotedPart(str) - } -} - -func getUriPart(str string) (*string, string) { - i := 0 - for i < len(str) && str[i] != '>' { - i += 1 - } - if i == len(str) { - return nil, str - } - part := str[0:i] - return &part, str[i+1:] -} - -func getQuotedPart(str string) (*string, string) { - i := 0 - start := 0 - out := "" - for i < len(str) && str[i] != '"' { - if str[i] == '\\' { - out += str[start:i] - switch str[i+1] { - case '\\': - out += "\\" - case 'r': - out += "\r" - case 'n': - out += "\n" - case 't': - out += "\t" - case '"': - out += "\"" - default: - return nil, str - } - i += 2 - start = i - continue - } - i += 1 - } - if i == len(str) { - return nil, str - } - out += str[start:i] - i += 1 - var remainder string - if strings.HasPrefix(str[i:], "^^<") { - // Ignore type, for now - _, remainder = getUriPart(str[i+3:]) - } else if strings.HasPrefix(str[i:], "@") { - _, remainder = getUnquotedPart(str[i+1:]) - } else { - remainder = str[i:] - } - - return &out, remainder -} - -func getUnquotedPart(str string) (*string, string) { - i := 0 - initStr := str - out := "" - start := 0 - for i < len(str) && !isWhitespace(str[i]) { - if str[i] == '"' { - part, remainder := getQuotedPart(str[i+1:]) - if part == nil { - return part, initStr - } - out += str[start:i] - str = remainder - i = 0 - start = 0 - out += *part - } - i += 1 - } - out += str[start:i] - return &out, str[i:] -} - -func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) { - bf := bufio.NewReader(reader) - - nTriples := 0 - line := "" - for { - l, pre, err := bf.ReadLine() - if err == io.EOF { - break - } - if err != nil { - glog.Fatalln("Something bad happened while reading file " + err.Error()) - } - line += string(l) - if pre { - continue - } - triple := ParseLineToTriple(line) - line = "" - if triple != nil { - nTriples++ - c <- triple - } - } - glog.Infoln("Read", nTriples, "triples") - close(c) -} diff --git a/src/nquads/nquads_test.go b/src/nquads/nquads_test.go deleted file mode 100644 index 47e02a5..0000000 --- a/src/nquads/nquads_test.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package nquads - -import ( - . "github.com/smartystreets/goconvey/convey" - "graph" - "testing" -) - -func TestParsingNTriples(t *testing.T) { - Convey("When parsing", t, func() { - Convey("It should not parse invalid triples", func() { - x := ParseLineToTriple("invalid") - So(x, ShouldBeNil) - }) - Convey("It should not parse comments", func() { - x := ParseLineToTriple("# nominally valid triple .") - So(x, ShouldBeNil) - }) - Convey("It should parse simple triples", func() { - x := ParseLineToTriple("this is valid .") - So(x, ShouldNotBeNil) - So(x.Sub, ShouldEqual, "this") - }) - Convey("It should parse quoted triples", func() { - x := ParseLineToTriple("this is \"valid too\" .") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "valid too") - So(x.Provenance, ShouldEqual, "") - }) - Convey("It should parse escaped quoted triples", func() { - x := ParseLineToTriple("he said \"\\\"That's all folks\\\"\" .") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "\"That's all folks\"") - So(x.Provenance, ShouldEqual, "") - }) - - Convey("It should parse an example real triple", func() { - x := ParseLineToTriple("\":/guid/9202a8c04000641f80000000010c843c\" \"name\" \"George Morris\" .") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "George Morris") - So(x.Provenance, ShouldEqual, "") - }) - - Convey("It should parse a pathologically spaced triple", func() { - x := ParseLineToTriple("foo is \"\\tA big tough\\r\\nDeal\\\\\" .") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "\tA big tough\r\nDeal\\") - So(x.Provenance, ShouldEqual, "") - }) - - Convey("It should parse a simple quad", func() { - x := ParseLineToTriple("this is valid quad .") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "valid") - So(x.Provenance, ShouldEqual, "quad") - }) - - Convey("It should parse a quoted quad", func() { - x := ParseLineToTriple("this is valid \"quad thing\" .") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "valid") - So(x.Provenance, ShouldEqual, "quad thing") - }) - - Convey("It should parse crazy escaped quads", func() { - x := ParseLineToTriple("\"\\\"this\" \"\\\"is\" \"\\\"valid\" \"\\\"quad thing\".") - So(x, ShouldNotBeNil) - So(x.Sub, ShouldEqual, "\"this") - So(x.Pred, ShouldEqual, "\"is") - So(x.Obj, ShouldEqual, "\"valid") - So(x.Provenance, ShouldEqual, "\"quad thing") - }) - }) -} - -func TestParsingNTriplesOfficial(t *testing.T) { - Convey("When using some public test cases...", t, func() { - Convey("It should handle some simple cases with comments", func() { - var x *graph.Triple - x = ParseLineToTriple(" . # comment") - So(x, ShouldNotBeNil) - So(x.Sub, ShouldEqual, "http://example/s") - So(x.Pred, ShouldEqual, "http://example/p") - So(x.Obj, ShouldEqual, "http://example/o") - So(x.Provenance, ShouldEqual, "") - x = ParseLineToTriple(" _:o . # comment") - So(x, ShouldNotBeNil) - So(x.Sub, ShouldEqual, "http://example/s") - So(x.Pred, ShouldEqual, "http://example/p") - So(x.Obj, ShouldEqual, "_:o") - So(x.Provenance, ShouldEqual, "") - x = ParseLineToTriple(" \"o\" . # comment") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "o") - So(x.Provenance, ShouldEqual, "") - x = ParseLineToTriple(" \"o\"^^ . # comment") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "o") - So(x.Provenance, ShouldEqual, "") - x = ParseLineToTriple(" \"o\"@en . # comment") - So(x, ShouldNotBeNil) - So(x.Obj, ShouldEqual, "o") - So(x.Provenance, ShouldEqual, "") - }) - }) -} - -func BenchmarkParser(b *testing.B) { - for n := 0; n < b.N; n++ { - x := ParseLineToTriple(" \"object of some real\\tlength\"@en . # comment") - if x.Obj != "object of some real\tlength" { - b.Fail() - } - } -}