From f8e28e066ef22e9198534485bb5dded4655b187c Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 1 Aug 2014 07:27:16 +0930 Subject: [PATCH 01/15] Rename NextResult -> NextPath See discussion in #92. --- TODO.md | 2 +- graph/iterator.go | 6 +++--- graph/iterator/all_iterator.go | 2 +- graph/iterator/and_iterator.go | 8 ++++---- graph/iterator/fixed_iterator.go | 2 +- graph/iterator/hasa_iterator.go | 12 ++++++------ graph/iterator/iterator.go | 2 +- graph/iterator/linksto_iterator.go | 4 ++-- graph/iterator/optional_iterator.go | 4 ++-- graph/iterator/or_iterator.go | 6 +++--- graph/iterator/value_comparison_iterator.go | 4 ++-- graph/leveldb/all_iterator.go | 2 +- graph/leveldb/iterator.go | 2 +- graph/memstore/iterator.go | 2 +- graph/memstore/triplestore_test.go | 2 +- graph/mongo/iterator.go | 2 +- graph/result_tree_evaluator.go | 2 +- query/gremlin/finals.go | 6 +++--- query/mql/session.go | 2 +- query/sexp/session.go | 2 +- 20 files changed, 37 insertions(+), 37 deletions(-) diff --git a/TODO.md b/TODO.md index 92c0118..74973c9 100644 --- a/TODO.md +++ b/TODO.md @@ -52,7 +52,7 @@ An important failure of MQL before was that it was never well-specified. Let's n ### New Iterators #### Limit Iterator -The necessary component to make mid-query limit work. Acts as a limit on Next(), a passthrough on Contains(), and a limit on NextResult() +The necessary component to make mid-query limit work. Acts as a limit on Next(), a passthrough on Contains(), and a limit on NextPath() ## Medium Term diff --git a/graph/iterator.go b/graph/iterator.go index 972f334..5b3cd8a 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -79,15 +79,15 @@ type Iterator interface { // To get the full results of iteration, do the following: // while (!Next()): // emit result - // while (!NextResult()): + // while (!NextPath()): // emit result // // All of them should set iterator.Last to be the last returned value, to // make results work. // - // NextResult() advances iterators that may have more than one valid result, + // NextPath() advances iterators that may have more than one valid result, // from the bottom up. - NextResult() bool + NextPath() bool // Contains returns whether the value is within the set held by the iterator. Contains(Value) bool diff --git a/graph/iterator/all_iterator.go b/graph/iterator/all_iterator.go index a3e1174..79f4057 100644 --- a/graph/iterator/all_iterator.go +++ b/graph/iterator/all_iterator.go @@ -110,7 +110,7 @@ func (it *Int64) Result() graph.Value { return it.result } -func (it *Int64) NextResult() bool { +func (it *Int64) NextPath() bool { return false } diff --git a/graph/iterator/and_iterator.go b/graph/iterator/and_iterator.go index 654f7c6..3fba7d1 100644 --- a/graph/iterator/and_iterator.go +++ b/graph/iterator/and_iterator.go @@ -236,15 +236,15 @@ func (it *And) Size() (int64, bool) { return val, b } -// An And has no NextResult of its own -- that is, there are no other values +// An And has no NextPath of its own -- that is, there are no other values // which satisfy our previous result that are not the result itself. Our // subiterators might, however, so just pass the call recursively. -func (it *And) NextResult() bool { - if it.primaryIt.NextResult() { +func (it *And) NextPath() bool { + if it.primaryIt.NextPath() { return true } for _, sub := range it.internalIterators { - if sub.NextResult() { + if sub.NextPath() { return true } } diff --git a/graph/iterator/fixed_iterator.go b/graph/iterator/fixed_iterator.go index 7efee2d..4fbcf08 100644 --- a/graph/iterator/fixed_iterator.go +++ b/graph/iterator/fixed_iterator.go @@ -156,7 +156,7 @@ func (it *Fixed) Result() graph.Value { return it.result } -func (it *Fixed) NextResult() bool { +func (it *Fixed) NextPath() bool { return false } diff --git a/graph/iterator/hasa_iterator.go b/graph/iterator/hasa_iterator.go index 954102a..93305e7 100644 --- a/graph/iterator/hasa_iterator.go +++ b/graph/iterator/hasa_iterator.go @@ -27,9 +27,9 @@ package iterator // value to check, it means "Check all predicates that have this value for your // direction against the subiterator." This would imply that there's more than // one possibility for the same Contains()ed value. While we could return the -// number of options, it's simpler to return one, and then call NextResult() +// number of options, it's simpler to return one, and then call NextPath() // enough times to enumerate the options. (In fact, one could argue that the -// raison d'etre for NextResult() is this iterator). +// raison d'etre for NextPath() is this iterator). // // Alternatively, can be seen as the dual of the LinksTo iterator. @@ -175,14 +175,14 @@ func (it *HasA) NextContains() bool { } // Get the next result that matches this branch. -func (it *HasA) NextResult() bool { - // Order here is important. If the subiterator has a NextResult, then we +func (it *HasA) NextPath() bool { + // Order here is important. If the subiterator has a NextPath, then we // need do nothing -- there is a next result, and we shouldn't move forward. // However, we then need to get the next result from our last Contains(). // - // The upshot is, the end of NextResult() bubbles up from the bottom of the + // The upshot is, the end of NextPath() bubbles up from the bottom of the // iterator tree up, and we need to respect that. - if it.primaryIt.NextResult() { + if it.primaryIt.NextPath() { return true } return it.NextContains() diff --git a/graph/iterator/iterator.go b/graph/iterator/iterator.go index 69b26c5..582991f 100644 --- a/graph/iterator/iterator.go +++ b/graph/iterator/iterator.go @@ -95,7 +95,7 @@ func (it *Null) SubIterators() []graph.Iterator { return nil } -func (it *Null) NextResult() bool { +func (it *Null) NextPath() bool { return false } diff --git a/graph/iterator/linksto_iterator.go b/graph/iterator/linksto_iterator.go index a79b34b..8148aa4 100644 --- a/graph/iterator/linksto_iterator.go +++ b/graph/iterator/linksto_iterator.go @@ -183,8 +183,8 @@ func (it *LinksTo) Close() { } // We won't ever have a new result, but our subiterators might. -func (it *LinksTo) NextResult() bool { - return it.primaryIt.NextResult() +func (it *LinksTo) NextPath() bool { + return it.primaryIt.NextPath() } // Register the LinksTo. diff --git a/graph/iterator/optional_iterator.go b/graph/iterator/optional_iterator.go index 646bc7f..0cd7eb5 100644 --- a/graph/iterator/optional_iterator.go +++ b/graph/iterator/optional_iterator.go @@ -88,9 +88,9 @@ func (it *Optional) Result() graph.Value { // An optional iterator only has a next result if, (a) last time we checked // we had any results whatsoever, and (b) there was another subresult in our // optional subbranch. -func (it *Optional) NextResult() bool { +func (it *Optional) NextPath() bool { if it.lastCheck { - return it.subIt.NextResult() + return it.subIt.NextPath() } return false } diff --git a/graph/iterator/or_iterator.go b/graph/iterator/or_iterator.go index b7e765a..b1b8100 100644 --- a/graph/iterator/or_iterator.go +++ b/graph/iterator/or_iterator.go @@ -228,13 +228,13 @@ func (it *Or) Size() (int64, bool) { return val, b } -// An Or has no NextResult of its own -- that is, there are no other values +// An Or has no NextPath of its own -- that is, there are no other values // which satisfy our previous result that are not the result itself. Our // subiterators might, however, so just pass the call recursively. In the case of // shortcircuiting, only allow new results from the currently checked graph.iterator -func (it *Or) NextResult() bool { +func (it *Or) NextPath() bool { if it.currentIterator != -1 { - return it.internalIterators[it.currentIterator].NextResult() + return it.internalIterators[it.currentIterator].NextPath() } return false } diff --git a/graph/iterator/value_comparison_iterator.go b/graph/iterator/value_comparison_iterator.go index 7c2eb58..405d152 100644 --- a/graph/iterator/value_comparison_iterator.go +++ b/graph/iterator/value_comparison_iterator.go @@ -152,9 +152,9 @@ func (it *Comparison) Result() graph.Value { return it.result } -func (it *Comparison) NextResult() bool { +func (it *Comparison) NextPath() bool { for { - hasNext := it.subIt.NextResult() + hasNext := it.subIt.NextPath() if !hasNext { return false } diff --git a/graph/leveldb/all_iterator.go b/graph/leveldb/all_iterator.go index 5346f97..f0504f1 100644 --- a/graph/leveldb/all_iterator.go +++ b/graph/leveldb/all_iterator.go @@ -129,7 +129,7 @@ func (it *AllIterator) Result() graph.Value { return it.result } -func (it *AllIterator) NextResult() bool { +func (it *AllIterator) NextPath() bool { return false } diff --git a/graph/leveldb/iterator.go b/graph/leveldb/iterator.go index b434dfd..01e2ba8 100644 --- a/graph/leveldb/iterator.go +++ b/graph/leveldb/iterator.go @@ -154,7 +154,7 @@ func (it *Iterator) Result() graph.Value { return it.result } -func (it *Iterator) NextResult() bool { +func (it *Iterator) NextPath() bool { return false } diff --git a/graph/memstore/iterator.go b/graph/memstore/iterator.go index 8a7e1ef..a84b069 100644 --- a/graph/memstore/iterator.go +++ b/graph/memstore/iterator.go @@ -111,7 +111,7 @@ func (it *Iterator) Result() graph.Value { return it.result } -func (it *Iterator) NextResult() bool { +func (it *Iterator) NextPath() bool { return false } diff --git a/graph/memstore/triplestore_test.go b/graph/memstore/triplestore_test.go index 44c43b4..80d25f0 100644 --- a/graph/memstore/triplestore_test.go +++ b/graph/memstore/triplestore_test.go @@ -128,7 +128,7 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) { ) for { got = append(got, ts.NameOf(all.Result())) - if !outerAnd.NextResult() { + if !outerAnd.NextPath() { break } } diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index f9cea5b..59444b7 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -165,7 +165,7 @@ func (it *Iterator) Result() graph.Value { return it.result } -func (it *Iterator) NextResult() bool { +func (it *Iterator) NextPath() bool { return false } diff --git a/graph/result_tree_evaluator.go b/graph/result_tree_evaluator.go index e2feb33..dfa988a 100644 --- a/graph/result_tree_evaluator.go +++ b/graph/result_tree_evaluator.go @@ -50,7 +50,7 @@ func StringResultTreeEvaluator(it Nexter) string { } out += it.ResultTree().String() out += "\n" - for it.NextResult() == true { + for it.NextPath() == true { out += " " out += it.ResultTree().String() out += "\n" diff --git a/query/gremlin/finals.go b/query/gremlin/finals.go index 022a394..dd9b2f0 100644 --- a/query/gremlin/finals.go +++ b/query/gremlin/finals.go @@ -162,7 +162,7 @@ func runIteratorToArray(it graph.Iterator, ses *Session, limit int) []map[string if limit >= 0 && count >= limit { break } - for it.NextResult() == true { + for it.NextPath() == true { if ses.doHalt { return nil } @@ -220,7 +220,7 @@ func runIteratorWithCallback(it graph.Iterator, ses *Session, callback otto.Valu if limit >= 0 && count >= limit { break } - for it.NextResult() == true { + for it.NextPath() == true { if ses.doHalt { return } @@ -259,7 +259,7 @@ func runIteratorOnSession(it graph.Iterator, ses *Session) { if !cont { break } - for it.NextResult() == true { + for it.NextPath() == true { if ses.doHalt { return } diff --git a/query/mql/session.go b/query/mql/session.go index c272005..3c6d1b1 100644 --- a/query/mql/session.go +++ b/query/mql/session.go @@ -96,7 +96,7 @@ func (s *Session) ExecInput(input string, c chan interface{}, limit int) { tags := make(map[string]graph.Value) it.TagResults(tags) c <- tags - for it.NextResult() == true { + for it.NextPath() == true { tags := make(map[string]graph.Value) it.TagResults(tags) c <- tags diff --git a/query/sexp/session.go b/query/sexp/session.go index c1a227b..2a3671a 100644 --- a/query/sexp/session.go +++ b/query/sexp/session.go @@ -89,7 +89,7 @@ func (s *Session) ExecInput(input string, out chan interface{}, limit int) { if nResults > limit && limit != -1 { break } - for it.NextResult() == true { + for it.NextPath() == true { tags := make(map[string]graph.Value) it.TagResults(tags) out <- &tags From b1a70d99aa9fbbe42bb9cce767e8e9804930525e Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 1 Aug 2014 09:15:02 +0930 Subject: [PATCH 02/15] Simplify Nexter interface This change allows a Nexter to be used in the same manner as a scanner using a for graph.Next(it) {} construction. It is important that graph.Next(it) and any associated it.Result() calls operate on the same iterator. --- graph/iterator.go | 28 +++++++++------- graph/iterator/all_iterator.go | 2 +- graph/iterator/and_iterator.go | 21 +++++------- graph/iterator/and_iterator_test.go | 25 ++++++--------- graph/iterator/fixed_iterator.go | 4 +-- graph/iterator/hasa_iterator.go | 21 ++++++------ graph/iterator/iterator.go | 4 +-- graph/iterator/linksto_iterator.go | 30 ++++++++--------- graph/iterator/linksto_iterator_test.go | 4 +-- graph/iterator/optional_iterator.go | 2 -- graph/iterator/or_iterator.go | 41 ++++++++++++------------ graph/iterator/or_iterator_test.go | 8 ++--- graph/iterator/query_shape.go | 8 ++--- graph/iterator/value_comparison_iterator.go | 17 ++++------ graph/iterator/value_comparison_iterator_test.go | 8 ++--- graph/leveldb/all_iterator.go | 8 ++--- graph/leveldb/iterator.go | 12 +++---- graph/leveldb/leveldb_test.go | 20 ++++-------- graph/leveldb/triplestore_iterator_optimize.go | 6 ++-- graph/memstore/all_iterator.go | 12 +++---- graph/memstore/iterator.go | 2 +- graph/memstore/triplestore.go | 7 ++-- graph/memstore/triplestore_iterator_optimize.go | 6 ++-- graph/memstore/triplestore_test.go | 10 +++--- graph/mongo/iterator.go | 6 ++-- graph/mongo/triplestore_iterator_optimize.go | 6 ++-- graph/result_tree_evaluator.go | 39 ++++++++++------------ query/gremlin/finals.go | 14 +++----- query/mql/session.go | 6 +--- query/sexp/parser_test.go | 18 +++++------ query/sexp/session.go | 6 +--- 31 files changed, 168 insertions(+), 233 deletions(-) diff --git a/graph/iterator.go b/graph/iterator.go index 5b3cd8a..a6b315c 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -77,10 +77,14 @@ type Iterator interface { // the iteration interface. // // To get the full results of iteration, do the following: - // while (!Next()): - // emit result - // while (!NextPath()): - // emit result + // + // for graph.Next(it) { + // val := it.Result() + // ... do things with val. + // for it.NextPath() { + // ... find other paths to iterate + // } + // } // // All of them should set iterator.Last to be the last returned value, to // make results work. @@ -135,22 +139,22 @@ type Iterator interface { } type Nexter interface { - // Next() advances the iterator and returns the next valid result. Returns - // (, true) or (nil, false) - Next() (Value, bool) + // Next advances the iterator to the next value, which will then be available through + // the Result method. It returns false if no further advancement is possible. + Next() bool Iterator } // Next is a convenience function that conditionally calls the Next method // of an Iterator if it is a Nexter. If the Iterator is not a Nexter, Next -// return a nil Value and false. -func Next(it Iterator) (Value, bool) { +// returns false. +func Next(it Iterator) bool { if n, ok := it.(Nexter); ok { return n.Next() } glog.Errorln("Nexting an un-nextable iterator") - return nil, false + return false } // FixedIterator wraps iterators that are modifiable by addition of fixed value sets. @@ -253,7 +257,7 @@ func NextLogIn(it Iterator) { } } -func NextLogOut(it Iterator, val Value, ok bool) (Value, bool) { +func NextLogOut(it Iterator, val Value, ok bool) bool { if glog.V(4) { if ok { glog.V(4).Infof("%s %d NEXT IS %d", strings.ToUpper(it.Type().String()), it.UID(), val) @@ -261,5 +265,5 @@ func NextLogOut(it Iterator, val Value, ok bool) (Value, bool) { glog.V(4).Infof("%s %d NEXT DONE", strings.ToUpper(it.Type().String()), it.UID()) } } - return val, ok + return ok } diff --git a/graph/iterator/all_iterator.go b/graph/iterator/all_iterator.go index 79f4057..80e471c 100644 --- a/graph/iterator/all_iterator.go +++ b/graph/iterator/all_iterator.go @@ -87,7 +87,7 @@ func (it *Int64) DebugString(indent int) string { // Next() on an Int64 all iterator is a simple incrementing counter. // Return the next integer, and mark it as the result. -func (it *Int64) Next() (graph.Value, bool) { +func (it *Int64) Next() bool { graph.NextLogIn(it) if it.at == -1 { return graph.NextLogOut(it, nil, false) diff --git a/graph/iterator/and_iterator.go b/graph/iterator/and_iterator.go index 3fba7d1..7cb7577 100644 --- a/graph/iterator/and_iterator.go +++ b/graph/iterator/and_iterator.go @@ -151,25 +151,20 @@ func (it *And) AddSubIterator(sub graph.Iterator) { it.itCount++ } -// Returns the Next value from the And iterator. Because the And is the -// intersection of its subiterators, it must choose one subiterator to produce a -// candidate, and check this value against the subiterators. A productive choice -// of primary iterator is therefore very important. -func (it *And) Next() (graph.Value, bool) { +// Returns advances the And iterator. Because the And is the intersection of its +// subiterators, it must choose one subiterator to produce a candidate, and check +// this value against the subiterators. A productive choice of primary iterator +// is therefore very important. +func (it *And) Next() bool { graph.NextLogIn(it) - var curr graph.Value - var exists bool - for { - curr, exists = graph.Next(it.primaryIt) - if !exists { - return graph.NextLogOut(it, nil, false) - } + for graph.Next(it.primaryIt) { + curr := it.primaryIt.Result() if it.subItsContain(curr) { it.result = curr return graph.NextLogOut(it, curr, true) } } - panic("unreachable") + return graph.NextLogOut(it, nil, false) } func (it *And) Result() graph.Value { diff --git a/graph/iterator/and_iterator_test.go b/graph/iterator/and_iterator_test.go index 82d290a..3023bfd 100644 --- a/graph/iterator/and_iterator_test.go +++ b/graph/iterator/and_iterator_test.go @@ -36,10 +36,10 @@ func TestTag(t *testing.T) { t.Errorf("Cannot get tag back, got %s", out[0]) } - val, ok := and.Next() - if !ok { + if !and.Next() { t.Errorf("And did not next") } + val := and.Result() if val != 234 { t.Errorf("Unexpected value") } @@ -76,18 +76,15 @@ func TestAndAndFixedIterators(t *testing.T) { t.Error("not accurate") } - val, ok := and.Next() - if val != 3 || ok == false { + if !and.Next() || and.Result() != 3 { t.Error("Incorrect first value") } - val, ok = and.Next() - if val != 4 || ok == false { + if !and.Next() || and.Result() != 4 { t.Error("Incorrect second value") } - val, ok = and.Next() - if ok { + if and.Next() { t.Error("Too many values") } @@ -117,8 +114,7 @@ func TestNonOverlappingFixedIterators(t *testing.T) { t.Error("not accurate") } - _, ok := and.Next() - if ok { + if and.Next() { t.Error("Too many values") } @@ -131,18 +127,15 @@ func TestAllIterators(t *testing.T) { and.AddSubIterator(all2) and.AddSubIterator(all1) - val, ok := and.Next() - if val.(int64) != 4 || ok == false { + if !and.Next() || and.Result() != int64(4) { t.Error("Incorrect first value") } - val, ok = and.Next() - if val.(int64) != 5 || ok == false { + if !and.Next() || and.Result() != int64(5) { t.Error("Incorrect second value") } - val, ok = and.Next() - if ok { + if and.Next() { t.Error("Too many values") } diff --git a/graph/iterator/fixed_iterator.go b/graph/iterator/fixed_iterator.go index 4fbcf08..a2d57b1 100644 --- a/graph/iterator/fixed_iterator.go +++ b/graph/iterator/fixed_iterator.go @@ -135,8 +135,8 @@ func (it *Fixed) Contains(v graph.Value) bool { return graph.ContainsLogOut(it, v, false) } -// Return the next stored value from the iterator. -func (it *Fixed) Next() (graph.Value, bool) { +// Next advances the iterator. +func (it *Fixed) Next() bool { graph.NextLogIn(it) if it.lastIndex == len(it.values) { return graph.NextLogOut(it, nil, false) diff --git a/graph/iterator/hasa_iterator.go b/graph/iterator/hasa_iterator.go index 93305e7..93e224a 100644 --- a/graph/iterator/hasa_iterator.go +++ b/graph/iterator/hasa_iterator.go @@ -158,16 +158,13 @@ func (it *HasA) Contains(val graph.Value) bool { // result iterator (a triple iterator based on the last checked value) and returns true if // another match is made. func (it *HasA) NextContains() bool { - for { - linkVal, ok := graph.Next(it.resultIt) - if !ok { - break - } + for graph.Next(it.resultIt) { + link := it.resultIt.Result() if glog.V(4) { - glog.V(4).Infoln("Quad is", it.ts.Quad(linkVal)) + glog.V(4).Infoln("Quad is", it.ts.Quad(link)) } - if it.primaryIt.Contains(linkVal) { - it.result = it.ts.TripleDirection(linkVal, it.dir) + if it.primaryIt.Contains(link) { + it.result = it.ts.TripleDirection(link, it.dir) return true } } @@ -188,20 +185,20 @@ func (it *HasA) NextPath() bool { return it.NextContains() } -// Get the next result from this iterator. This is simpler than Contains. We have a +// Next advances the iterator. This is simpler than Contains. We have a // subiterator we can get a value from, and we can take that resultant triple, // pull our direction out of it, and return that. -func (it *HasA) Next() (graph.Value, bool) { +func (it *HasA) Next() bool { graph.NextLogIn(it) if it.resultIt != nil { it.resultIt.Close() } it.resultIt = &Null{} - tID, ok := graph.Next(it.primaryIt) - if !ok { + if !graph.Next(it.primaryIt) { return graph.NextLogOut(it, 0, false) } + tID := it.primaryIt.Result() name := it.ts.Quad(tID).Get(it.dir) val := it.ts.ValueOf(name) it.result = val diff --git a/graph/iterator/iterator.go b/graph/iterator/iterator.go index 582991f..67d8a80 100644 --- a/graph/iterator/iterator.go +++ b/graph/iterator/iterator.go @@ -79,8 +79,8 @@ func (it *Null) DebugString(indent int) string { return strings.Repeat(" ", indent) + "(null)" } -func (it *Null) Next() (graph.Value, bool) { - return nil, false +func (it *Null) Next() bool { + return false } func (it *Null) Result() graph.Value { diff --git a/graph/iterator/linksto_iterator.go b/graph/iterator/linksto_iterator.go index 8148aa4..517fc11 100644 --- a/graph/iterator/linksto_iterator.go +++ b/graph/iterator/linksto_iterator.go @@ -153,23 +153,23 @@ func (it *LinksTo) Optimize() (graph.Iterator, bool) { } // Next()ing a LinksTo operates as described above. -func (it *LinksTo) Next() (graph.Value, bool) { +func (it *LinksTo) Next() bool { graph.NextLogIn(it) - val, ok := graph.Next(it.nextIt) - if !ok { - // Subiterator is empty, get another one - candidate, ok := graph.Next(it.primaryIt) - if !ok { - // We're out of nodes in our subiterator, so we're done as well. - return graph.NextLogOut(it, 0, false) - } - it.nextIt.Close() - it.nextIt = it.ts.TripleIterator(it.dir, candidate) - // Recurse -- return the first in the next set. - return it.Next() + if graph.Next(it.nextIt) { + it.result = it.nextIt.Result() + return graph.NextLogOut(it, it.nextIt, true) } - it.result = val - return graph.NextLogOut(it, val, ok) + + // Subiterator is empty, get another one + if !graph.Next(it.primaryIt) { + // We're out of nodes in our subiterator, so we're done as well. + return graph.NextLogOut(it, 0, false) + } + it.nextIt.Close() + it.nextIt = it.ts.TripleIterator(it.dir, it.primaryIt.Result()) + + // Recurse -- return the first in the next set. + return it.Next() } func (it *LinksTo) Result() graph.Value { diff --git a/graph/iterator/linksto_iterator_test.go b/graph/iterator/linksto_iterator_test.go index 797ce6e..b1eb1fa 100644 --- a/graph/iterator/linksto_iterator_test.go +++ b/graph/iterator/linksto_iterator_test.go @@ -33,10 +33,10 @@ func TestLinksTo(t *testing.T) { } fixed.Add(val) lto := NewLinksTo(ts, fixed, quad.Object) - val, ok := lto.Next() - if !ok { + if !lto.Next() { t.Error("At least one triple matches the fixed object") } + val = lto.Result() if val != 2 { t.Errorf("Quad index 2, such as %s, should match %s", ts.Quad(2), ts.Quad(val)) } diff --git a/graph/iterator/optional_iterator.go b/graph/iterator/optional_iterator.go index 0cd7eb5..2432390 100644 --- a/graph/iterator/optional_iterator.go +++ b/graph/iterator/optional_iterator.go @@ -51,8 +51,6 @@ func NewOptional(it graph.Iterator) *Optional { } } -func (it *Optional) CanNext() bool { return false } - func (it *Optional) UID() uint64 { return it.uid } diff --git a/graph/iterator/or_iterator.go b/graph/iterator/or_iterator.go index b1b8100..ddff768 100644 --- a/graph/iterator/or_iterator.go +++ b/graph/iterator/or_iterator.go @@ -141,35 +141,34 @@ func (it *Or) AddSubIterator(sub graph.Iterator) { it.itCount++ } -// Returns the Next value from the Or graph.iterator. Because the Or is the -// union of its subiterators, it must produce from all subiterators -- unless -// it's shortcircuiting, in which case, it's the first one that returns anything. -func (it *Or) Next() (graph.Value, bool) { +// Next advances the Or graph.iterator. Because the Or is the union of its +// subiterators, it must produce from all subiterators -- unless it it +// shortcircuiting, in which case, it is the first one that returns anything. +func (it *Or) Next() bool { graph.NextLogIn(it) - var curr graph.Value - var exists bool - firstTime := false + var first bool for { if it.currentIterator == -1 { it.currentIterator = 0 - firstTime = true + first = true } curIt := it.internalIterators[it.currentIterator] - curr, exists = graph.Next(curIt) - if !exists { - if it.isShortCircuiting && !firstTime { - return graph.NextLogOut(it, nil, false) - } - it.currentIterator++ - if it.currentIterator == it.itCount { - return graph.NextLogOut(it, nil, false) - } - } else { - it.result = curr - return graph.NextLogOut(it, curr, true) + + if graph.Next(curIt) { + it.result = curIt.Result() + return graph.NextLogOut(it, it.result, true) + } + + if it.isShortCircuiting && !first { + break + } + it.currentIterator++ + if it.currentIterator == it.itCount { + break } } - panic("unreachable") + + return graph.NextLogOut(it, nil, false) } func (it *Or) Result() graph.Value { diff --git a/graph/iterator/or_iterator_test.go b/graph/iterator/or_iterator_test.go index bbd5de6..b147d56 100644 --- a/graph/iterator/or_iterator_test.go +++ b/graph/iterator/or_iterator_test.go @@ -23,12 +23,8 @@ import ( func iterated(it graph.Iterator) []int { var res []int - for { - val, ok := graph.Next(it) - if !ok { - break - } - res = append(res, val.(int)) + for graph.Next(it) { + res = append(res, it.Result().(int)) } return res } diff --git a/graph/iterator/query_shape.go b/graph/iterator/query_shape.go index 973f6db..77e10eb 100644 --- a/graph/iterator/query_shape.go +++ b/graph/iterator/query_shape.go @@ -129,12 +129,8 @@ func (qs *queryShape) MakeNode(it graph.Iterator) *Node { } case graph.Fixed: n.IsFixed = true - for { - val, more := graph.Next(it) - if !more { - break - } - n.Values = append(n.Values, qs.ts.NameOf(val)) + for graph.Next(it) { + n.Values = append(n.Values, qs.ts.NameOf(it.Result())) } case graph.HasA: hasa := it.(*HasA) diff --git a/graph/iterator/value_comparison_iterator.go b/graph/iterator/value_comparison_iterator.go index 405d152..91b1985 100644 --- a/graph/iterator/value_comparison_iterator.go +++ b/graph/iterator/value_comparison_iterator.go @@ -127,20 +127,15 @@ func (it *Comparison) Clone() graph.Iterator { return out } -func (it *Comparison) Next() (graph.Value, bool) { - var val graph.Value - var ok bool - for { - val, ok = graph.Next(it.subIt) - if !ok { - return nil, false - } +func (it *Comparison) Next() bool { + for graph.Next(it.subIt) { + val := it.subIt.Result() if it.doComparison(val) { - break + it.result = val + return true } } - it.result = val - return val, ok + return false } // DEPRECATED diff --git a/graph/iterator/value_comparison_iterator_test.go b/graph/iterator/value_comparison_iterator_test.go index e7482e8..a2bb108 100644 --- a/graph/iterator/value_comparison_iterator_test.go +++ b/graph/iterator/value_comparison_iterator_test.go @@ -69,12 +69,8 @@ func TestValueComparison(t *testing.T) { vc := NewComparison(simpleFixedIterator(), test.operator, test.operand, ts) var got []string - for { - val, ok := vc.Next() - if !ok { - break - } - got = append(got, ts.NameOf(val)) + for vc.Next() { + got = append(got, ts.NameOf(vc.Result())) } if !reflect.DeepEqual(got, test.expect) { t.Errorf("Failed to show %s, got:%q expect:%q", test.message, got, test.expect) diff --git a/graph/leveldb/all_iterator.go b/graph/leveldb/all_iterator.go index f0504f1..15c8882 100644 --- a/graph/leveldb/all_iterator.go +++ b/graph/leveldb/all_iterator.go @@ -101,10 +101,10 @@ func (it *AllIterator) Clone() graph.Iterator { return out } -func (it *AllIterator) Next() (graph.Value, bool) { +func (it *AllIterator) Next() bool { if !it.open { it.result = nil - return nil, false + return false } var out []byte out = make([]byte, len(it.iter.Key())) @@ -115,10 +115,10 @@ func (it *AllIterator) Next() (graph.Value, bool) { } if !bytes.HasPrefix(out, it.prefix) { it.Close() - return nil, false + return false } it.result = out - return out, true + return true } func (it *AllIterator) ResultTree() *graph.ResultTree { diff --git a/graph/leveldb/iterator.go b/graph/leveldb/iterator.go index 01e2ba8..13e3ed3 100644 --- a/graph/leveldb/iterator.go +++ b/graph/leveldb/iterator.go @@ -117,19 +117,19 @@ func (it *Iterator) Close() { } } -func (it *Iterator) Next() (graph.Value, bool) { +func (it *Iterator) Next() bool { if it.iter == nil { it.result = nil - return nil, false + return false } if !it.open { it.result = nil - return nil, false + return false } if !it.iter.Valid() { it.result = nil it.Close() - return nil, false + return false } if bytes.HasPrefix(it.iter.Key(), it.nextPrefix) { out := make([]byte, len(it.iter.Key())) @@ -139,11 +139,11 @@ func (it *Iterator) Next() (graph.Value, bool) { if !ok { it.Close() } - return out, true + return true } it.Close() it.result = nil - return nil, false + return false } func (it *Iterator) ResultTree() *graph.ResultTree { diff --git a/graph/leveldb/leveldb_test.go b/graph/leveldb/leveldb_test.go index 0e6772a..c6f3653 100644 --- a/graph/leveldb/leveldb_test.go +++ b/graph/leveldb/leveldb_test.go @@ -45,12 +45,8 @@ func makeTripleSet() []*quad.Quad { func iteratedTriples(qs graph.TripleStore, it graph.Iterator) []*quad.Quad { var res ordered - for { - val, ok := graph.Next(it) - if !ok { - break - } - res = append(res, qs.Quad(val)) + for graph.Next(it) { + res = append(res, qs.Quad(it.Result())) } sort.Sort(res) return res @@ -85,12 +81,8 @@ func (o ordered) Swap(i, j int) { o[i], o[j] = o[j], o[i] } func iteratedNames(qs graph.TripleStore, it graph.Iterator) []string { var res []string - for { - val, ok := graph.Next(it) - if !ok { - break - } - res = append(res, qs.NameOf(val)) + for graph.Next(it) { + res = append(res, qs.NameOf(it.Result())) } sort.Strings(res) return res @@ -266,8 +258,8 @@ func TestIterator(t *testing.T) { it.Reset() it = qs.TriplesAllIterator() - edge, _ := graph.Next(it) - triple := qs.Quad(edge) + graph.Next(it) + triple := qs.Quad(it.Result()) set := makeTripleSet() var ok bool for _, t := range set { diff --git a/graph/leveldb/triplestore_iterator_optimize.go b/graph/leveldb/triplestore_iterator_optimize.go index 9aab0f2..31b7f7d 100644 --- a/graph/leveldb/triplestore_iterator_optimize.go +++ b/graph/leveldb/triplestore_iterator_optimize.go @@ -37,10 +37,10 @@ func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bo if primary.Type() == graph.Fixed { size, _ := primary.Size() if size == 1 { - val, ok := graph.Next(primary) - if !ok { - panic("Sizes lie") + if !graph.Next(primary) { + panic("unexpected size during optimize") } + val := primary.Result() newIt := ts.TripleIterator(it.Direction(), val) nt := newIt.Tagger() nt.CopyFrom(it) diff --git a/graph/memstore/all_iterator.go b/graph/memstore/all_iterator.go index 658a4a1..658254e 100644 --- a/graph/memstore/all_iterator.go +++ b/graph/memstore/all_iterator.go @@ -36,15 +36,13 @@ func (it *AllIterator) SubIterators() []graph.Iterator { return nil } -func (it *AllIterator) Next() (graph.Value, bool) { - next, out := it.Int64.Next() - if !out { - return next, out +func (it *AllIterator) Next() bool { + if !it.Int64.Next() { + return false } - i64 := next.(int64) - _, ok := it.ts.revIdMap[i64] + _, ok := it.ts.revIdMap[it.Int64.Result().(int64)] if !ok { return it.Next() } - return next, out + return true } diff --git a/graph/memstore/iterator.go b/graph/memstore/iterator.go index a84b069..24dda19 100644 --- a/graph/memstore/iterator.go +++ b/graph/memstore/iterator.go @@ -93,7 +93,7 @@ func (it *Iterator) Clone() graph.Iterator { func (it *Iterator) Close() {} -func (it *Iterator) Next() (graph.Value, bool) { +func (it *Iterator) Next() bool { graph.NextLogIn(it) if it.tree.Max() == nil || it.result == int64(it.tree.Max().(Int64)) { return graph.NextLogOut(it, nil, false) diff --git a/graph/memstore/triplestore.go b/graph/memstore/triplestore.go index 23eb11a..7199fdf 100644 --- a/graph/memstore/triplestore.go +++ b/graph/memstore/triplestore.go @@ -132,11 +132,8 @@ func (ts *TripleStore) tripleExists(t *quad.Quad) (bool, int64) { } it := NewLlrbIterator(smallest_tree, "") - for { - val, ok := it.Next() - if !ok { - break - } + for it.Next() { + val := it.Result() if t.Equals(&ts.triples[val.(int64)]) { return true, val.(int64) } diff --git a/graph/memstore/triplestore_iterator_optimize.go b/graph/memstore/triplestore_iterator_optimize.go index 3dc2c2c..ae5628f 100644 --- a/graph/memstore/triplestore_iterator_optimize.go +++ b/graph/memstore/triplestore_iterator_optimize.go @@ -37,10 +37,10 @@ func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bo if primary.Type() == graph.Fixed { size, _ := primary.Size() if size == 1 { - val, ok := graph.Next(primary) - if !ok { - panic("Sizes lie") + if !graph.Next(primary) { + panic("unexpected size during optimize") } + val := primary.Result() newIt := ts.TripleIterator(it.Direction(), val) nt := newIt.Tagger() nt.CopyFrom(it) diff --git a/graph/memstore/triplestore_test.go b/graph/memstore/triplestore_test.go index 80d25f0..b790140 100644 --- a/graph/memstore/triplestore_test.go +++ b/graph/memstore/triplestore_test.go @@ -114,10 +114,10 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) { outerAnd.AddSubIterator(fixed) outerAnd.AddSubIterator(hasa) - val, ok := outerAnd.Next() - if !ok { + if !outerAnd.Next() { t.Error("Expected one matching subtree") } + val := outerAnd.Result() if ts.NameOf(val) != "C" { t.Errorf("Matching subtree should be %s, got %s", "barak", ts.NameOf(val)) } @@ -138,8 +138,7 @@ func TestIteratorsAndNextResultOrderA(t *testing.T) { t.Errorf("Unexpected result, got:%q expect:%q", got, expect) } - val, ok = outerAnd.Next() - if ok { + if outerAnd.Next() { t.Error("More than one possible top level output?") } } @@ -190,8 +189,7 @@ func TestRemoveTriple(t *testing.T) { hasa := iterator.NewHasA(ts, innerAnd, quad.Object) newIt, _ := hasa.Optimize() - _, ok := graph.Next(newIt) - if ok { + if graph.Next(newIt) { t.Error("E should not have any followers.") } } diff --git a/graph/mongo/iterator.go b/graph/mongo/iterator.go index 59444b7..9e37089 100644 --- a/graph/mongo/iterator.go +++ b/graph/mongo/iterator.go @@ -138,7 +138,7 @@ func (it *Iterator) Clone() graph.Iterator { return m } -func (it *Iterator) Next() (graph.Value, bool) { +func (it *Iterator) Next() bool { var result struct { Id string "_id" //Sub string "Sub" @@ -151,10 +151,10 @@ func (it *Iterator) Next() (graph.Value, bool) { if err != nil { glog.Errorln("Error Nexting Iterator: ", err) } - return nil, false + return false } it.result = result.Id - return result.Id, true + return true } func (it *Iterator) ResultTree() *graph.ResultTree { diff --git a/graph/mongo/triplestore_iterator_optimize.go b/graph/mongo/triplestore_iterator_optimize.go index d2d1a05..99fb25b 100644 --- a/graph/mongo/triplestore_iterator_optimize.go +++ b/graph/mongo/triplestore_iterator_optimize.go @@ -37,10 +37,10 @@ func (ts *TripleStore) optimizeLinksTo(it *iterator.LinksTo) (graph.Iterator, bo if primary.Type() == graph.Fixed { size, _ := primary.Size() if size == 1 { - val, ok := graph.Next(primary) - if !ok { - panic("Sizes lie") + if !graph.Next(primary) { + panic("unexpected size during optimize") } + val := primary.Result() newIt := ts.TripleIterator(it.Direction(), val) nt := newIt.Tagger() nt.CopyFrom(it) diff --git a/graph/result_tree_evaluator.go b/graph/result_tree_evaluator.go index dfa988a..56947d4 100644 --- a/graph/result_tree_evaluator.go +++ b/graph/result_tree_evaluator.go @@ -14,7 +14,10 @@ package graph -import "fmt" +import ( + "bytes" + "fmt" +) type ResultTree struct { result Value @@ -26,14 +29,13 @@ func NewResultTree(result Value) *ResultTree { } func (t *ResultTree) String() string { - base := fmt.Sprintf("(%d", t.result) - if len(t.subtrees) != 0 { - for _, sub := range t.subtrees { - base += fmt.Sprintf(" %s", sub) - } + var buf bytes.Buffer + fmt.Fprintf(&buf, "(%d", t.result) + for _, sub := range t.subtrees { + fmt.Fprintf(&buf, " %s", sub) } - base += ")" - return base + buf.WriteByte(')') + return buf.String() } func (t *ResultTree) AddSubtree(sub *ResultTree) { @@ -41,22 +43,15 @@ func (t *ResultTree) AddSubtree(sub *ResultTree) { } func StringResultTreeEvaluator(it Nexter) string { - ok := true - out := "" - for { - _, ok = it.Next() - if !ok { - break - } - out += it.ResultTree().String() - out += "\n" - for it.NextPath() == true { - out += " " - out += it.ResultTree().String() - out += "\n" + var buf bytes.Buffer + for it.Next() { + fmt.Fprintln(&buf, it.ResultTree()) + for it.NextPath() { + buf.WriteByte(' ') + fmt.Fprintln(&buf, it.ResultTree()) } } - return out + return buf.String() } func PrintResultTreeEvaluator(it Nexter) { diff --git a/query/gremlin/finals.go b/query/gremlin/finals.go index dd9b2f0..12f07e6 100644 --- a/query/gremlin/finals.go +++ b/query/gremlin/finals.go @@ -151,8 +151,7 @@ func runIteratorToArray(it graph.Iterator, ses *Session, limit int) []map[string if ses.doHalt { return nil } - _, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { break } tags := make(map[string]graph.Value) @@ -187,11 +186,10 @@ func runIteratorToArrayNoTags(it graph.Iterator, ses *Session, limit int) []stri if ses.doHalt { return nil } - val, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { break } - output = append(output, ses.ts.NameOf(val)) + output = append(output, ses.ts.NameOf(it.Result())) count++ if limit >= 0 && count >= limit { break @@ -208,8 +206,7 @@ func runIteratorWithCallback(it graph.Iterator, ses *Session, callback otto.Valu if ses.doHalt { return } - _, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { break } tags := make(map[string]graph.Value) @@ -249,8 +246,7 @@ func runIteratorOnSession(it graph.Iterator, ses *Session) { if ses.doHalt { return } - _, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { break } tags := make(map[string]graph.Value) diff --git a/query/mql/session.go b/query/mql/session.go index 3c6d1b1..63aa6ba 100644 --- a/query/mql/session.go +++ b/query/mql/session.go @@ -88,11 +88,7 @@ func (s *Session) ExecInput(input string, c chan interface{}, limit int) { if glog.V(2) { glog.V(2).Infoln(it.DebugString(0)) } - for { - _, ok := graph.Next(it) - if !ok { - break - } + for graph.Next(it) { tags := make(map[string]graph.Value) it.TagResults(tags) c <- tags diff --git a/query/sexp/parser_test.go b/query/sexp/parser_test.go index e7e66bf..d74e546 100644 --- a/query/sexp/parser_test.go +++ b/query/sexp/parser_test.go @@ -67,10 +67,10 @@ func TestMemstoreBackedSexp(t *testing.T) { if it.Type() != test.typ { t.Errorf("Incorrect type for %s, got:%q expect %q", test.message, it.Type(), test.expect) } - got, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { t.Errorf("Failed to %s", test.message) } + got := it.Result() if expect := ts.ValueOf(test.expect); got != expect { t.Errorf("Incorrect result for %s, got:%v expect %v", test.message, got, expect) } @@ -88,10 +88,10 @@ func TestTreeConstraintParse(t *testing.T) { if it.Type() != graph.And { t.Error("Odd iterator tree. Got: %s", it.DebugString(0)) } - out, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { t.Error("Got no results") } + out := it.Result() if out != ts.ValueOf("i") { t.Errorf("Got %d, expected %d", out, ts.ValueOf("i")) } @@ -105,8 +105,7 @@ func TestTreeConstraintTagParse(t *testing.T) { "(:like\n" + "($a (:is :good))))" it := BuildIteratorTreeForQuery(ts, query) - _, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { t.Error("Got no results") } tags := make(map[string]graph.Value) @@ -135,15 +134,14 @@ func TestMultipleConstraintParse(t *testing.T) { if it.Type() != graph.And { t.Error("Odd iterator tree. Got: %s", it.DebugString(0)) } - out, ok := graph.Next(it) - if !ok { + if !graph.Next(it) { t.Error("Got no results") } + out := it.Result() if out != ts.ValueOf("i") { t.Errorf("Got %d, expected %d", out, ts.ValueOf("i")) } - _, ok = graph.Next(it) - if ok { + if graph.Next(it) { t.Error("Too many results") } } diff --git a/query/sexp/session.go b/query/sexp/session.go index 2a3671a..eea974f 100644 --- a/query/sexp/session.go +++ b/query/sexp/session.go @@ -77,11 +77,7 @@ func (s *Session) ExecInput(input string, out chan interface{}, limit int) { fmt.Println(it.DebugString(0)) } nResults := 0 - for { - _, ok := graph.Next(it) - if !ok { - break - } + for graph.Next(it) { tags := make(map[string]graph.Value) it.TagResults(tags) out <- &tags From b74cb142f02bc7a0079cf080de1bd4c112b2acab Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 6 Aug 2014 16:59:46 -0400 Subject: [PATCH 03/15] Key/Keyer --- graph/iterator/materialize_iterator.go | 12 ++++++------ graph/leveldb/triplestore.go | 2 +- graph/triplestore.go | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/graph/iterator/materialize_iterator.go b/graph/iterator/materialize_iterator.go index 46baf0c..f740594 100644 --- a/graph/iterator/materialize_iterator.go +++ b/graph/iterator/materialize_iterator.go @@ -32,8 +32,8 @@ type result struct { tags map[string]graph.Value } -type hasher interface { - Hasher() interface{} +type Keyer interface { + Key() interface{} } type Materialize struct { @@ -205,8 +205,8 @@ func (it *Materialize) Contains(v graph.Value) bool { return it.subIt.Contains(v) } key := v - if h, ok := v.(hasher); ok { - key = h.Hasher() + if h, ok := v.(Keyer); ok { + key = h.Key() } if i, ok := it.containsMap[key]; ok { it.index = i @@ -246,8 +246,8 @@ func (it *Materialize) materializeSet() { break } val := id - if h, ok := id.(hasher); ok { - val = h.Hasher() + if h, ok := id.(Keyer); ok { + val = h.Key() } if _, ok := it.containsMap[val]; !ok { it.containsMap[val] = len(it.values) diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index 2bd1279..1a305d2 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -44,7 +44,7 @@ const ( type Token []byte -func (t Token) Hasher() interface{} { +func (t Token) Key() interface{} { return string(t) } diff --git a/graph/triplestore.go b/graph/triplestore.go index 5a68841..b25ca74 100644 --- a/graph/triplestore.go +++ b/graph/triplestore.go @@ -37,7 +37,7 @@ import ( // pointers to structs, or merely triples, or whatever works best for the // backing store. // -// These must be comparable, or implement a `Hasher() interface{}` function +// These must be comparable, or implement a `Key() interface{}` function // so that they may be stored in maps. type Value interface{} From b52f9726ea6e30528c44ccc049ebb2c78e3362e9 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Wed, 6 Aug 2014 17:18:12 -0400 Subject: [PATCH 04/15] update readme --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0e616a8..65c0faa 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,13 @@ Its goal is to be a part of the developer's toolbox where [Linked Data](http://l [![Build Status](https://travis-ci.org/google/cayley.png?branch=master)](https://travis-ci.org/google/cayley) ## What's new? -* 2014-07-12: - * Massive cleanup and restructuring is largely done, it should be even easier to add to Cayley. (thanks @kortschak) - * A couple new backends are in progress, namely Postgres and Cassandra -- PRs when they come around. - * Cayley is [now in Homebrew](https://github.com/Homebrew/homebrew/commit/1bd2fb2a61c7101a8c79c05afc90eeb02e9aa240), thanks to @whitlockjc - * Our first client API (for Clojure, thanks to @wjb) -- list is now started on the [Client API wiki page](https://github.com/google/cayley/wiki/Client-APIs) +* 2014-08-06: + * 0.3.1 Binary Release including: + * New Quad Parser (more strictly passing the [W3C spec](http://www.w3.org/TR/n-quads) and test suite) + * Automatic decompression of quad files + * Ruby and a Node.JS [client libraries](https://github.com/google/cayley/wiki/Client-APIs) from the community. + * Benchmarks + * [Large speedups on HEAD](https://github.com/google/cayley/pull/101) (in for the next binary release) ## Features From 191244c40e88ba23b4c556d60b16fe8f5fe236cf Mon Sep 17 00:00:00 2001 From: kortschak Date: Thu, 7 Aug 2014 07:07:45 +0930 Subject: [PATCH 05/15] Sync iteration calls to Nexter interface changes --- graph/iterator/materialize_iterator.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/graph/iterator/materialize_iterator.go b/graph/iterator/materialize_iterator.go index f740594..ac6a1bc 100644 --- a/graph/iterator/materialize_iterator.go +++ b/graph/iterator/materialize_iterator.go @@ -32,6 +32,10 @@ type result struct { tags map[string]graph.Value } +// Keyer provides a method for comparing types that are not otherwise comparable. +// The Key method must return a dynamic type that is comparable according to the +// Go language specification. The returned value must be unique for each receiver +// value. type Keyer interface { Key() interface{} } @@ -179,7 +183,7 @@ func (it *Materialize) Stats() graph.IteratorStats { } } -func (it *Materialize) Next() (graph.Value, bool) { +func (it *Materialize) Next() bool { graph.NextLogIn(it) if !it.hasRun { it.materializeSet() @@ -216,12 +220,12 @@ func (it *Materialize) Contains(v graph.Value) bool { return graph.ContainsLogOut(it, v, false) } -func (it *Materialize) NextResult() bool { +func (it *Materialize) NextPath() bool { if !it.hasRun { it.materializeSet() } if it.aborted { - return it.subIt.NextResult() + return it.subIt.NextPath() } it.subindex++ @@ -235,16 +239,13 @@ func (it *Materialize) NextResult() bool { func (it *Materialize) materializeSet() { i := 0 - for { - id, ok := graph.Next(it.subIt) - if !ok { - break - } - i += 1 + for graph.Next(it.subIt) { + i++ if i > abortMaterializeAt { it.aborted = true break } + id := it.subIt.Result() val := id if h, ok := id.(Keyer); ok { val = h.Key() @@ -257,7 +258,7 @@ func (it *Materialize) materializeSet() { tags := make(map[string]graph.Value) it.subIt.TagResults(tags) it.values[index] = append(it.values[index], result{id: id, tags: tags}) - for it.subIt.NextResult() == true { + for it.subIt.NextPath() { tags := make(map[string]graph.Value) it.subIt.TagResults(tags) it.values[index] = append(it.values[index], result{id: id, tags: tags}) From 7265e1d7a179f584e8c05407c31f95774f421293 Mon Sep 17 00:00:00 2001 From: kortschak Date: Thu, 7 Aug 2014 15:02:30 +0930 Subject: [PATCH 06/15] Use github.com/peterh/liner for REPL lines This gives us history and line conveniences. --- .gitignore | 1 + .travis.yml | 1 + db/repl.go | 169 +++++++++++++++++++++++++++++++++++++++--------------------- 3 files changed, 112 insertions(+), 59 deletions(-) diff --git a/.gitignore b/.gitignore index 17c43e6..206a77a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ main *.test *.peg.go cayley.cfg +.cayley_history diff --git a/.travis.yml b/.travis.yml index 66af39d..bd0558f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ install: - go get github.com/barakmich/glog - go get github.com/julienschmidt/httprouter - go get github.com/petar/GoLLRB/llrb + - go get github.com/peterh/liner - go get github.com/robertkrimen/otto - go get github.com/russross/blackfriday - go get github.com/syndtr/goleveldb/leveldb diff --git a/db/repl.go b/db/repl.go index 730d59d..11ecfea 100644 --- a/db/repl.go +++ b/db/repl.go @@ -15,14 +15,15 @@ package db import ( - "bufio" - "bytes" - "errors" "fmt" "io" "os" + "os/signal" + "strings" "time" + "github.com/peterh/liner" + "github.com/google/cayley/config" "github.com/google/cayley/graph" "github.com/google/cayley/quad/cquads" @@ -62,6 +63,13 @@ func Run(query string, ses query.Session) { } } +const ( + ps1 = "cayley> " + ps2 = "... " + + history = ".cayley_history" +) + func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error { var ses query.Session switch queryLanguage { @@ -74,80 +82,123 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error default: ses = gremlin.NewSession(ts, cfg.Timeout, true) } - buf := bufio.NewReader(os.Stdin) - var line []byte + + term, err := terminal(history) + if os.IsNotExist(err) { + fmt.Printf("creating new history file: %q\n", history) + } + defer persist(term, history) + + var ( + prompt = ps1 + + code string + ) + for { - if len(line) == 0 { - fmt.Print("cayley> ") + if len(code) == 0 { + prompt = ps1 } else { - fmt.Print("... ") + prompt = ps2 } - l, prefix, err := buf.ReadLine() - if err == io.EOF { - if len(line) != 0 { - line = line[:0] - } else { + line, err := term.Prompt(prompt) + if err != nil { + if err == io.EOF { return nil } + return err } - if err != nil { - line = line[:0] - } - if prefix { - return errors.New("line too long") - } - line = append(line, l...) - if len(line) == 0 { - continue - } - line = bytes.TrimSpace(line) + + term.AppendHistory(line) + + line = strings.TrimSpace(line) if len(line) == 0 || line[0] == '#' { - line = line[:0] continue } - if bytes.HasPrefix(line, []byte(":debug")) { - ses.ToggleDebug() - fmt.Println("Debug Toggled") - line = line[:0] - continue - } - if bytes.HasPrefix(line, []byte(":a")) { - var tripleStmt = line[3:] - triple, err := cquads.Parse(string(tripleStmt)) - if !triple.IsValid() { - if err != nil { - fmt.Printf("not a valid triple: %v\n", err) + + if code == "" { + switch { + case strings.HasPrefix(line, ":debug"): + ses.ToggleDebug() + fmt.Println("Debug Toggled") + continue + + case strings.HasPrefix(line, ":a"): + triple, err := cquads.Parse(line[3:]) + if !triple.IsValid() { + if err != nil { + fmt.Printf("not a valid triple: %v\n", err) + } + continue } - line = line[:0] + ts.AddTriple(triple) + continue + + case strings.HasPrefix(line, ":d"): + triple, err := cquads.Parse(line[3:]) + if !triple.IsValid() { + if err != nil { + fmt.Printf("not a valid triple: %v\n", err) + } + continue + } + ts.RemoveTriple(triple) continue } - ts.AddTriple(triple) - line = line[:0] - continue } - if bytes.HasPrefix(line, []byte(":d")) { - var tripleStmt = line[3:] - triple, err := cquads.Parse(string(tripleStmt)) - if !triple.IsValid() { - if err != nil { - fmt.Printf("not a valid triple: %v\n", err) - } - line = line[:0] - continue - } - ts.RemoveTriple(triple) - line = line[:0] - continue - } - result, err := ses.InputParses(string(line)) + + code += line + + result, err := ses.InputParses(code) switch result { case query.Parsed: - Run(string(line), ses) - line = line[:0] + Run(code, ses) + code = "" case query.ParseFail: fmt.Println("Error: ", err) - line = line[:0] + code = "" case query.ParseMore: } } } + +func terminal(path string) (*liner.State, error) { + term := liner.NewLiner() + + go func() { + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt, os.Kill) + <-c + + persist(term, history) + + err := term.Close() + if err != nil { + fmt.Fprintf(os.Stderr, "failed to properly clean up terminal: %v\n", err) + os.Exit(1) + } + + os.Exit(0) + }() + + f, err := os.Open(path) + if err != nil { + return term, err + } + defer f.Close() + _, err = term.ReadHistory(f) + return term, err +} + +func persist(term *liner.State, path string) error { + f, err := os.OpenFile(path, os.O_RDWR|os.O_APPEND|os.O_CREATE, 0666) + if err != nil { + return fmt.Errorf("could not open %q to append history: %v", path, err) + } + defer f.Close() + _, err = term.WriteHistory(f) + if err != nil { + return fmt.Errorf("could not write history to %q: %v", path, err) + } + return nil +} From 2d4c07b56d4b063919c7f72d4322e5dfff2399c1 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 08:36:49 +0930 Subject: [PATCH 07/15] Make db name-literal agnostic Move the persistence characteristic of the store into the graph registry and provide an API hook to get that information. Add error return for init on a non-persistent store. Updates #35. --- cayley.go | 2 +- db/init.go | 9 +++++++++ db/open.go | 3 +-- graph/leveldb/triplestore.go | 2 +- graph/memstore/triplestore.go | 2 +- graph/mongo/triplestore.go | 2 +- graph/triplestore.go | 37 ++++++++++++++++++++++--------------- 7 files changed, 36 insertions(+), 21 deletions(-) diff --git a/cayley.go b/cayley.go index c68d487..b6017ce 100644 --- a/cayley.go +++ b/cayley.go @@ -134,6 +134,6 @@ func main() { flag.Usage() } if err != nil { - glog.Fatalln(err) + glog.Errorln(err) } } diff --git a/db/init.go b/db/init.go index a791a8f..650a854 100644 --- a/db/init.go +++ b/db/init.go @@ -15,11 +15,20 @@ package db import ( + "errors" + "fmt" + "github.com/google/cayley/config" "github.com/google/cayley/graph" ) +var ErrNotPersistent = errors.New("database type is not persistent") + func Init(cfg *config.Config, triplePath string) error { + if !graph.IsPersistent(cfg.DatabaseType) { + return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) + } + err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) if err != nil { return err diff --git a/db/open.go b/db/open.go index 3e1ee24..cef8127 100644 --- a/db/open.go +++ b/db/open.go @@ -28,8 +28,7 @@ func Open(cfg *config.Config) (graph.TripleStore, error) { return nil, err } - // Memstore is not persistent, so it MUST be loaded. - if cfg.DatabaseType == "memstore" { + if !graph.IsPersistent(cfg.DatabaseType) { err = Load(ts, cfg, cfg.DatabasePath) if err != nil { return nil, err diff --git a/graph/leveldb/triplestore.go b/graph/leveldb/triplestore.go index 1a305d2..7efb03f 100644 --- a/graph/leveldb/triplestore.go +++ b/graph/leveldb/triplestore.go @@ -34,7 +34,7 @@ import ( ) func init() { - graph.RegisterTripleStore("leveldb", newTripleStore, createNewLevelDB) + graph.RegisterTripleStore("leveldb", true, newTripleStore, createNewLevelDB) } const ( diff --git a/graph/memstore/triplestore.go b/graph/memstore/triplestore.go index 056998c..3641a60 100644 --- a/graph/memstore/triplestore.go +++ b/graph/memstore/triplestore.go @@ -26,7 +26,7 @@ import ( ) func init() { - graph.RegisterTripleStore("memstore", func(string, graph.Options) (graph.TripleStore, error) { + graph.RegisterTripleStore("memstore", false, func(string, graph.Options) (graph.TripleStore, error) { return newTripleStore(), nil }, nil) } diff --git a/graph/mongo/triplestore.go b/graph/mongo/triplestore.go index 20fea2f..364d195 100644 --- a/graph/mongo/triplestore.go +++ b/graph/mongo/triplestore.go @@ -30,7 +30,7 @@ import ( ) func init() { - graph.RegisterTripleStore("mongo", newTripleStore, createNewMongoGraph) + graph.RegisterTripleStore("mongo", true, newTripleStore, createNewMongoGraph) } // Guarantee we satisfy graph.Bulkloader. diff --git a/graph/triplestore.go b/graph/triplestore.go index b25ca74..9d45dd5 100644 --- a/graph/triplestore.go +++ b/graph/triplestore.go @@ -136,38 +136,45 @@ type BulkLoader interface { type NewStoreFunc func(string, Options) (TripleStore, error) type InitStoreFunc func(string, Options) error -var storeRegistry = make(map[string]NewStoreFunc) -var storeInitRegistry = make(map[string]InitStoreFunc) +type register struct { + newFunc NewStoreFunc + initFunc InitStoreFunc + isPersistent bool +} -func RegisterTripleStore(name string, newFunc NewStoreFunc, initFunc InitStoreFunc) { +var storeRegistry = make(map[string]register) + +func RegisterTripleStore(name string, persists bool, newFunc NewStoreFunc, initFunc InitStoreFunc) { if _, found := storeRegistry[name]; found { panic("already registered TripleStore " + name) } - storeRegistry[name] = newFunc - if initFunc != nil { - storeInitRegistry[name] = initFunc + storeRegistry[name] = register{ + newFunc: newFunc, + initFunc: initFunc, + isPersistent: persists, } } func NewTripleStore(name, dbpath string, opts Options) (TripleStore, error) { - newFunc, hasNew := storeRegistry[name] - if !hasNew { + r, registered := storeRegistry[name] + if !registered { return nil, errors.New("triplestore: name '" + name + "' is not registered") } - return newFunc(dbpath, opts) + return r.newFunc(dbpath, opts) } func InitTripleStore(name, dbpath string, opts Options) error { - initFunc, hasInit := storeInitRegistry[name] - if hasInit { - return initFunc(dbpath, opts) - } - if _, isRegistered := storeRegistry[name]; isRegistered { - return nil + r, registered := storeRegistry[name] + if registered { + return r.initFunc(dbpath, opts) } return errors.New("triplestore: name '" + name + "' is not registered") } +func IsPersistent(name string) bool { + return storeRegistry[name].isPersistent +} + func TripleStores() []string { t := make([]string, 0, len(storeRegistry)) for n := range storeRegistry { From 4c3f5109ebadd3450fb5cd4118bc9fc71046035b Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 09:31:02 +0930 Subject: [PATCH 08/15] Separate db.Open and db.Load Updates #82. --- cayley.go | 29 ++++++++++++++++++++++++----- cayley_test.go | 7 +++++++ db/open.go | 7 ------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/cayley.go b/cayley.go index b6017ce..a2a54dd 100644 --- a/cayley.go +++ b/cayley.go @@ -35,7 +35,7 @@ import ( _ "github.com/google/cayley/graph/mongo" ) -var tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.") +var tripleFile = flag.String("triples", "", "Triple file to load for database init.") var cpuprofile = flag.String("prof", "", "Output profiling file.") var queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") var configFile = flag.String("config", "", "Path to an explicit configuration file.") @@ -100,35 +100,54 @@ func main() { fmt.Println("Cayley snapshot") } os.Exit(0) + case "init": err = db.Init(cfg, *tripleFile) + case "load": ts, err = db.Open(cfg) if err != nil { break } - err = db.Load(ts, cfg, *tripleFile) + err = db.Load(ts, cfg, cfg.DatabasePath) if err != nil { break } + ts.Close() + case "repl": ts, err = db.Open(cfg) if err != nil { break } - err = db.Repl(ts, *queryLanguage, cfg) - if err != nil { - break + if !graph.IsPersistent(cfg.DatabaseType) { + err = db.Load(ts, cfg, cfg.DatabasePath) + if err != nil { + break + } } + + err = db.Repl(ts, *queryLanguage, cfg) + ts.Close() + case "http": ts, err = db.Open(cfg) if err != nil { break } + if !graph.IsPersistent(cfg.DatabaseType) { + err = db.Load(ts, cfg, cfg.DatabasePath) + if err != nil { + break + } + } + http.Serve(ts, cfg) + ts.Close() + default: fmt.Println("No command", cmd) flag.Usage() diff --git a/cayley_test.go b/cayley_test.go index 55ef3c2..e59ec53 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -308,6 +308,13 @@ func prepare(t testing.TB) { if err != nil { t.Fatalf("Failed to open %q: %v", cfg.DatabasePath, err) } + + if !graph.IsPersistent(cfg.DatabaseType) { + err = db.Load(ts, cfg, cfg.DatabasePath) + if err != nil { + t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) + } + } }) } diff --git a/db/open.go b/db/open.go index cef8127..bf1f2c7 100644 --- a/db/open.go +++ b/db/open.go @@ -28,12 +28,5 @@ func Open(cfg *config.Config) (graph.TripleStore, error) { return nil, err } - if !graph.IsPersistent(cfg.DatabaseType) { - err = Load(ts, cfg, cfg.DatabasePath) - if err != nil { - return nil, err - } - } - return ts, nil } From 979a0c4aeecb8a21d046ca635cb49ab02b9006a5 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 10:28:18 +0930 Subject: [PATCH 09/15] Allow optional strict N-Quad parsing This puts more of the logic in cayley, but other approaches require that db knows about quad formats. --- cayley.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++------ cayley_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++- db/init.go | 19 ++--------- db/load.go | 62 ++++------------------------------ db/load_test.go | 81 --------------------------------------------- 5 files changed, 177 insertions(+), 164 deletions(-) delete mode 100644 db/load_test.go diff --git a/cayley.go b/cayley.go index a2a54dd..fa0558e 100644 --- a/cayley.go +++ b/cayley.go @@ -17,8 +17,12 @@ package main import ( + "bytes" + "compress/bzip2" + "compress/gzip" "flag" "fmt" + "io" "os" "runtime" @@ -28,6 +32,9 @@ import ( "github.com/google/cayley/db" "github.com/google/cayley/graph" "github.com/google/cayley/http" + "github.com/google/cayley/quad" + "github.com/google/cayley/quad/cquads" + "github.com/google/cayley/quad/nquads" // Load all supported backends. _ "github.com/google/cayley/graph/leveldb" @@ -35,14 +42,19 @@ import ( _ "github.com/google/cayley/graph/mongo" ) -var tripleFile = flag.String("triples", "", "Triple file to load for database init.") -var cpuprofile = flag.String("prof", "", "Output profiling file.") -var queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") -var configFile = flag.String("config", "", "Path to an explicit configuration file.") +var ( + tripleFile = flag.String("triples", "", "Triple File to load before going to REPL.") + tripleType = flag.String("format", "cquad", `Triple format to use for loading ("cquad" or "nquad").`) + cpuprofile = flag.String("prof", "", "Output profiling file.") + queryLanguage = flag.String("query_lang", "gremlin", "Use this parser as the query language.") + configFile = flag.String("config", "", "Path to an explicit configuration file.") +) // Filled in by `go build ldflags="-X main.VERSION `ver`"`. -var BUILD_DATE string -var VERSION string +var ( + BUILD_DATE string + VERSION string +) func Usage() { fmt.Println("Cayley is a graph store and graph query layer.") @@ -102,14 +114,28 @@ func main() { os.Exit(0) case "init": - err = db.Init(cfg, *tripleFile) + err = db.Init(cfg) + if err != nil { + break + } + if *tripleFile != "" { + ts, err = db.Open(cfg) + if err != nil { + break + } + err = load(ts, cfg, *tripleFile, *tripleType) + if err != nil { + break + } + ts.Close() + } case "load": ts, err = db.Open(cfg) if err != nil { break } - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", *tripleType) if err != nil { break } @@ -122,7 +148,7 @@ func main() { break } if !graph.IsPersistent(cfg.DatabaseType) { - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", *tripleType) if err != nil { break } @@ -138,7 +164,7 @@ func main() { break } if !graph.IsPersistent(cfg.DatabaseType) { - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", *tripleType) if err != nil { break } @@ -156,3 +182,58 @@ func main() { glog.Errorln(err) } } + +// TODO(kortschak) Make path a URI to allow pointing to any resource. +func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error { + if path == "" { + path = cfg.DatabasePath + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("could not open file %q: %v", path, err) + } + defer f.Close() + + r, err := decompressor(f) + if err != nil { + return err + } + + var dec quad.Unmarshaler + switch typ { + case "cquad": + dec = cquads.NewDecoder(r) + case "nquad": + dec = nquads.NewDecoder(r) + default: + return fmt.Errorf("unknown quad format %q", typ) + } + + return db.Load(ts, cfg, dec) +} + +const ( + gzipMagic = "\x1f\x8b" + b2zipMagic = "BZh" +) + +type readAtReader interface { + io.Reader + io.ReaderAt +} + +func decompressor(r readAtReader) (io.Reader, error) { + var buf [3]byte + _, err := r.ReadAt(buf[:], 0) + if err != nil { + return nil, err + } + switch { + case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0: + return gzip.NewReader(r) + case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0: + return bzip2.NewReader(r), nil + default: + return r, nil + } +} diff --git a/cayley_test.go b/cayley_test.go index e59ec53..eba0720 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -15,6 +15,9 @@ package main import ( + "bytes" + "compress/bzip2" + "compress/gzip" "sync" "testing" "time" @@ -310,7 +313,7 @@ func prepare(t testing.TB) { } if !graph.IsPersistent(cfg.DatabaseType) { - err = db.Load(ts, cfg, cfg.DatabasePath) + err = load(ts, cfg, "", "cquad") if err != nil { t.Fatalf("Failed to load %q: %v", cfg.DatabasePath, err) } @@ -415,3 +418,76 @@ func BenchmarkKeanuOther(b *testing.B) { func BenchmarkKeanuBullockOther(b *testing.B) { runBench(8, b) } + +var testDecompressor = []struct { + message string + input []byte + expect []byte + err error + readErr error +}{ + { + message: "text input", + input: []byte("cayley data\n"), + err: nil, + expect: []byte("cayley data\n"), + readErr: nil, + }, + { + message: "gzip input", + input: []byte{ + 0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad, + 0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00, + }, + err: nil, + expect: []byte("cayley data\n"), + readErr: nil, + }, + { + message: "bzip2 input", + input: []byte{ + 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00, + 0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c, + 0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16, + 0xa9, 0x7c, 0x78, 0x80, + }, + err: nil, + expect: []byte("cayley data\n"), + readErr: nil, + }, + { + message: "bad gzip input", + input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + err: gzip.ErrHeader, + expect: nil, + readErr: nil, + }, + { + message: "bad bzip2 input", + input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + err: nil, + expect: nil, + readErr: bzip2.StructuralError("invalid compression level"), + }, +} + +func TestDecompressor(t *testing.T) { + for _, test := range testDecompressor { + buf := bytes.NewReader(test.input) + r, err := decompressor(buf) + if err != test.err { + t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err) + } + if err != nil { + continue + } + p := make([]byte, len(test.expect)*2) + n, err := r.Read(p) + if err != test.readErr { + t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err) + } + if bytes.Compare(p[:n], test.expect) != 0 { + t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect) + } + } +} diff --git a/db/init.go b/db/init.go index 650a854..0f0887c 100644 --- a/db/init.go +++ b/db/init.go @@ -24,25 +24,10 @@ import ( var ErrNotPersistent = errors.New("database type is not persistent") -func Init(cfg *config.Config, triplePath string) error { +func Init(cfg *config.Config) error { if !graph.IsPersistent(cfg.DatabaseType) { return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) } - err := graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) - if err != nil { - return err - } - if triplePath != "" { - ts, err := Open(cfg) - if err != nil { - return err - } - err = Load(ts, cfg, triplePath) - if err != nil { - return err - } - ts.Close() - } - return err + return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) } diff --git a/db/load.go b/db/load.go index 9a3b069..2f0ab23 100644 --- a/db/load.go +++ b/db/load.go @@ -15,46 +15,24 @@ package db import ( - "bytes" - "compress/bzip2" - "compress/gzip" - "fmt" "io" - "os" - "github.com/barakmich/glog" "github.com/google/cayley/config" "github.com/google/cayley/graph" "github.com/google/cayley/quad" - "github.com/google/cayley/quad/cquads" ) -func Load(ts graph.TripleStore, cfg *config.Config, path string) error { - f, err := os.Open(path) - if err != nil { - return fmt.Errorf("could not open file %q: %v", path, err) - } - defer f.Close() - - r, err := decompressor(f) - if err != nil { - glog.Fatalln(err) - } - - dec := cquads.NewDecoder(r) - +func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { bulker, canBulk := ts.(graph.BulkLoader) if canBulk { - err = bulker.BulkLoad(dec) - if err == nil { + switch err := bulker.BulkLoad(dec); err { + case nil: return nil + case graph.ErrCannotBulkLoad: + // Try individual loading. + default: + return err } - if err == graph.ErrCannotBulkLoad { - err = nil - } - } - if err != nil { - return err } block := make([]quad.Quad, 0, cfg.LoadSize) @@ -76,29 +54,3 @@ func Load(ts graph.TripleStore, cfg *config.Config, path string) error { return nil } - -const ( - gzipMagic = "\x1f\x8b" - b2zipMagic = "BZh" -) - -type readAtReader interface { - io.Reader - io.ReaderAt -} - -func decompressor(r readAtReader) (io.Reader, error) { - var buf [3]byte - _, err := r.ReadAt(buf[:], 0) - if err != nil { - return nil, err - } - switch { - case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0: - return gzip.NewReader(r) - case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0: - return bzip2.NewReader(r), nil - default: - return r, nil - } -} diff --git a/db/load_test.go b/db/load_test.go deleted file mode 100644 index 17ed6c5..0000000 --- a/db/load_test.go +++ /dev/null @@ -1,81 +0,0 @@ -package db - -import ( - "bytes" - "compress/bzip2" - "compress/gzip" - "testing" -) - -var testDecompressor = []struct { - message string - input []byte - expect []byte - err error - readErr error -}{ - { - message: "text input", - input: []byte("cayley data\n"), - err: nil, - expect: []byte("cayley data\n"), - readErr: nil, - }, - { - message: "gzip input", - input: []byte{ - 0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad, - 0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00, - }, - err: nil, - expect: []byte("cayley data\n"), - readErr: nil, - }, - { - message: "bzip2 input", - input: []byte{ - 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00, - 0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c, - 0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16, - 0xa9, 0x7c, 0x78, 0x80, - }, - err: nil, - expect: []byte("cayley data\n"), - readErr: nil, - }, - { - message: "bad gzip input", - input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, - err: gzip.ErrHeader, - expect: nil, - readErr: nil, - }, - { - message: "bad bzip2 input", - input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, - err: nil, - expect: nil, - readErr: bzip2.StructuralError("invalid compression level"), - }, -} - -func TestDecompressor(t *testing.T) { - for _, test := range testDecompressor { - buf := bytes.NewReader(test.input) - r, err := decompressor(buf) - if err != test.err { - t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err) - } - if err != nil { - continue - } - p := make([]byte, len(test.expect)*2) - n, err := r.Read(p) - if err != test.readErr { - t.Fatalf("Unexpected error for reading %s, got:%v expect:%v", test.message, err, test.err) - } - if bytes.Compare(p[:n], test.expect) != 0 { - t.Errorf("Unexpected read result for %s, got:%q expect:%q", test.message, p[:n], test.expect) - } - } -} From 088e73a163f2024ff76a91d7ab2f92c57eda4533 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 10:32:58 +0930 Subject: [PATCH 10/15] Merge {init,open,load}.go into db.go The functions are too small to justify a single file each. The repl file is left out as semantically unrelated. --- db/db.go | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ db/init.go | 33 -------------------------- db/load.go | 56 ------------------------------------------- db/open.go | 32 ------------------------- 4 files changed, 80 insertions(+), 121 deletions(-) create mode 100644 db/db.go delete mode 100644 db/init.go delete mode 100644 db/load.go delete mode 100644 db/open.go diff --git a/db/db.go b/db/db.go new file mode 100644 index 0000000..8ea30db --- /dev/null +++ b/db/db.go @@ -0,0 +1,80 @@ +// Copyright 2014 The Cayley Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package db + +import ( + "errors" + "fmt" + "io" + + "github.com/barakmich/glog" + + "github.com/google/cayley/config" + "github.com/google/cayley/graph" + "github.com/google/cayley/quad" +) + +var ErrNotPersistent = errors.New("database type is not persistent") + +func Init(cfg *config.Config) error { + if !graph.IsPersistent(cfg.DatabaseType) { + return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) + } + + return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) +} + +func Open(cfg *config.Config) (graph.TripleStore, error) { + glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath) + ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) + if err != nil { + return nil, err + } + + return ts, nil +} + +func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { + bulker, canBulk := ts.(graph.BulkLoader) + if canBulk { + switch err := bulker.BulkLoad(dec); err { + case nil: + return nil + case graph.ErrCannotBulkLoad: + // Try individual loading. + default: + return err + } + } + + block := make([]quad.Quad, 0, cfg.LoadSize) + for { + t, err := dec.Unmarshal() + if err != nil { + if err == io.EOF { + break + } + return err + } + block = append(block, t) + if len(block) == cap(block) { + ts.AddTripleSet(block) + block = block[:0] + } + } + ts.AddTripleSet(block) + + return nil +} diff --git a/db/init.go b/db/init.go deleted file mode 100644 index 0f0887c..0000000 --- a/db/init.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package db - -import ( - "errors" - "fmt" - - "github.com/google/cayley/config" - "github.com/google/cayley/graph" -) - -var ErrNotPersistent = errors.New("database type is not persistent") - -func Init(cfg *config.Config) error { - if !graph.IsPersistent(cfg.DatabaseType) { - return fmt.Errorf("ignoring unproductive database initialization request: %v", ErrNotPersistent) - } - - return graph.InitTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) -} diff --git a/db/load.go b/db/load.go deleted file mode 100644 index 2f0ab23..0000000 --- a/db/load.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package db - -import ( - "io" - - "github.com/google/cayley/config" - "github.com/google/cayley/graph" - "github.com/google/cayley/quad" -) - -func Load(ts graph.TripleStore, cfg *config.Config, dec quad.Unmarshaler) error { - bulker, canBulk := ts.(graph.BulkLoader) - if canBulk { - switch err := bulker.BulkLoad(dec); err { - case nil: - return nil - case graph.ErrCannotBulkLoad: - // Try individual loading. - default: - return err - } - } - - block := make([]quad.Quad, 0, cfg.LoadSize) - for { - t, err := dec.Unmarshal() - if err != nil { - if err == io.EOF { - break - } - return err - } - block = append(block, t) - if len(block) == cap(block) { - ts.AddTripleSet(block) - block = block[:0] - } - } - ts.AddTripleSet(block) - - return nil -} diff --git a/db/open.go b/db/open.go deleted file mode 100644 index bf1f2c7..0000000 --- a/db/open.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2014 The Cayley Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package db - -import ( - "github.com/barakmich/glog" - - "github.com/google/cayley/config" - "github.com/google/cayley/graph" -) - -func Open(cfg *config.Config) (graph.TripleStore, error) { - glog.Infof("Opening database %q at %s", cfg.DatabaseType, cfg.DatabasePath) - ts, err := graph.NewTripleStore(cfg.DatabaseType, cfg.DatabasePath, cfg.DatabaseOptions) - if err != nil { - return nil, err - } - - return ts, nil -} From 844927ff1f4a692412031309fa58c04fd9f65208 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 21:27:17 +0930 Subject: [PATCH 11/15] Make decompressor conditional on reader interface --- cayley.go | 13 ++++++------- cayley_test.go | 37 +++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/cayley.go b/cayley.go index fa0558e..0c2efda 100644 --- a/cayley.go +++ b/cayley.go @@ -217,14 +217,13 @@ const ( b2zipMagic = "BZh" ) -type readAtReader interface { - io.Reader - io.ReaderAt -} - -func decompressor(r readAtReader) (io.Reader, error) { +func decompressor(r io.Reader) (io.Reader, error) { + ra, ok := r.(io.ReaderAt) + if !ok { + return r, nil + } var buf [3]byte - _, err := r.ReadAt(buf[:], 0) + _, err := ra.ReadAt(buf[:], 0) if err != nil { return nil, err } diff --git a/cayley_test.go b/cayley_test.go index eba0720..ce382b2 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -18,6 +18,8 @@ import ( "bytes" "compress/bzip2" "compress/gzip" + "io" + "strings" "sync" "testing" "time" @@ -419,62 +421,77 @@ func BenchmarkKeanuBullockOther(b *testing.B) { runBench(8, b) } +// reader is a test helper to filter non-io.Reader methods from the contained io.Reader. +type reader struct { + r io.Reader +} + +func (r reader) Read(p []byte) (int, error) { + return r.r.Read(p) +} + var testDecompressor = []struct { message string - input []byte + input io.Reader expect []byte err error readErr error }{ { message: "text input", - input: []byte("cayley data\n"), + input: strings.NewReader("cayley data\n"), err: nil, expect: []byte("cayley data\n"), readErr: nil, }, { message: "gzip input", - input: []byte{ + input: bytes.NewReader([]byte{ 0x1f, 0x8b, 0x08, 0x00, 0x5c, 0xbc, 0xcd, 0x53, 0x00, 0x03, 0x4b, 0x4e, 0xac, 0xcc, 0x49, 0xad, 0x54, 0x48, 0x49, 0x2c, 0x49, 0xe4, 0x02, 0x00, 0x03, 0xe1, 0xfc, 0xc3, 0x0c, 0x00, 0x00, 0x00, - }, + }), err: nil, expect: []byte("cayley data\n"), readErr: nil, }, { message: "bzip2 input", - input: []byte{ + input: bytes.NewReader([]byte{ 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0xb5, 0x4b, 0xe3, 0xc4, 0x00, 0x00, 0x02, 0xd1, 0x80, 0x00, 0x10, 0x40, 0x00, 0x2e, 0x04, 0x04, 0x20, 0x20, 0x00, 0x31, 0x06, 0x4c, 0x41, 0x4c, 0x1e, 0xa7, 0xa9, 0x2a, 0x18, 0x26, 0xb1, 0xc2, 0xee, 0x48, 0xa7, 0x0a, 0x12, 0x16, 0xa9, 0x7c, 0x78, 0x80, - }, + }), err: nil, expect: []byte("cayley data\n"), readErr: nil, }, { message: "bad gzip input", - input: []byte{0x1f, 0x8b, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + input: strings.NewReader("\x1f\x8bcayley data\n"), err: gzip.ErrHeader, expect: nil, readErr: nil, }, { message: "bad bzip2 input", - input: []byte{0x42, 0x5a, 0x68, 'c', 'a', 'y', 'l', 'e', 'y', ' ', 'd', 'a', 't', 'a', '\n'}, + input: strings.NewReader("\x42\x5a\x68cayley data\n"), err: nil, expect: nil, readErr: bzip2.StructuralError("invalid compression level"), }, + { + message: "gzip input without ReadAt", + input: reader{strings.NewReader("\x1f\x8bcayley data\n")}, + err: nil, + expect: []byte("\x1f\x8bcayley data\n"), + readErr: nil, + }, } func TestDecompressor(t *testing.T) { for _, test := range testDecompressor { - buf := bytes.NewReader(test.input) - r, err := decompressor(buf) + r, err := decompressor(test.input) if err != test.err { t.Fatalf("Unexpected error for %s, got:%v expect:%v", test.message, err, test.err) } From 4844ef3e58796be9ddb311ee98a0ae09b8464285 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 22:19:06 +0930 Subject: [PATCH 12/15] Add support for remote data sources Now you can, for example: cayley http --dbpath="https://github.com/google/cayley/blob/master/30kmoviedata.nq.gz?raw=true" --- cayley.go | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/cayley.go b/cayley.go index 0c2efda..3d7a157 100644 --- a/cayley.go +++ b/cayley.go @@ -17,13 +17,17 @@ package main import ( + "bufio" "bytes" "compress/bzip2" "compress/gzip" "flag" "fmt" "io" + client "net/http" + "net/url" "os" + "path/filepath" "runtime" "github.com/barakmich/glog" @@ -183,18 +187,35 @@ func main() { } } -// TODO(kortschak) Make path a URI to allow pointing to any resource. func load(ts graph.TripleStore, cfg *config.Config, path, typ string) error { + var r io.Reader + if path == "" { path = cfg.DatabasePath } - f, err := os.Open(path) - if err != nil { - return fmt.Errorf("could not open file %q: %v", path, err) + u, err := url.Parse(path) + if err != nil || u.Scheme == "file" || u.Scheme == "" { + // Don't alter relative URL path or non-URL path parameter. + if u.Scheme != "" && err == nil { + // Recovery heuristic for mistyping "file://path/to/file". + path = filepath.Join(u.Host, u.Path) + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("could not open file %q: %v", path, err) + } + defer f.Close() + r = f + } else { + res, err := client.Get(path) + if err != nil { + return fmt.Errorf("could not get resource <%s>: %v", u, err) + } + defer res.Body.Close() + r = res.Body } - defer f.Close() - r, err := decompressor(f) + r, err = decompressor(r) if err != nil { return err } @@ -218,21 +239,17 @@ const ( ) func decompressor(r io.Reader) (io.Reader, error) { - ra, ok := r.(io.ReaderAt) - if !ok { - return r, nil - } - var buf [3]byte - _, err := ra.ReadAt(buf[:], 0) + br := bufio.NewReader(r) + buf, err := br.Peek(3) if err != nil { return nil, err } switch { case bytes.Compare(buf[:2], []byte(gzipMagic)) == 0: - return gzip.NewReader(r) + return gzip.NewReader(br) case bytes.Compare(buf[:3], []byte(b2zipMagic)) == 0: - return bzip2.NewReader(r), nil + return bzip2.NewReader(br), nil default: - return r, nil + return br, nil } } From 86bf7e9e6aad200a0850f5a92e35352b52700b97 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 8 Aug 2014 22:21:42 +0930 Subject: [PATCH 13/15] Remove now-irrelevant failing test --- cayley_test.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cayley_test.go b/cayley_test.go index ce382b2..d108f85 100644 --- a/cayley_test.go +++ b/cayley_test.go @@ -480,13 +480,6 @@ var testDecompressor = []struct { expect: nil, readErr: bzip2.StructuralError("invalid compression level"), }, - { - message: "gzip input without ReadAt", - input: reader{strings.NewReader("\x1f\x8bcayley data\n")}, - err: nil, - expect: []byte("\x1f\x8bcayley data\n"), - readErr: nil, - }, } func TestDecompressor(t *testing.T) { From 9263b05d75ee8632829f0f1e777407e3e6f6a4a3 Mon Sep 17 00:00:00 2001 From: kortschak Date: Sat, 9 Aug 2014 10:09:43 +0930 Subject: [PATCH 14/15] Use cquads in the web UI data loader Fixes issue #106. --- http/write.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/http/write.go b/http/write.go index f810941..54e5537 100644 --- a/http/write.go +++ b/http/write.go @@ -26,7 +26,7 @@ import ( "github.com/julienschmidt/httprouter" "github.com/google/cayley/quad" - "github.com/google/cayley/quad/nquads" + "github.com/google/cayley/quad/cquads" ) func ParseJsonToTripleList(jsonBody []byte) ([]quad.Quad, error) { @@ -78,7 +78,8 @@ func (api *Api) ServeV1WriteNQuad(w http.ResponseWriter, r *http.Request, params blockSize = int64(api.config.LoadSize) } - dec := nquads.NewDecoder(formFile) + // TODO(kortschak) Make this configurable from the web UI. + dec := cquads.NewDecoder(formFile) var ( n int From 11c3cd17c6c978cbe656e80013638e7d894c068d Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Sat, 9 Aug 2014 01:23:23 -0400 Subject: [PATCH 15/15] Fix cayley load command --- cayley.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cayley.go b/cayley.go index 3d7a157..7e58749 100644 --- a/cayley.go +++ b/cayley.go @@ -139,7 +139,7 @@ func main() { if err != nil { break } - err = load(ts, cfg, "", *tripleType) + err = load(ts, cfg, *tripleFile, *tripleType) if err != nil { break }