diff --git a/graph/iterator.go b/graph/iterator.go index 17bcec0..67a79ba 100644 --- a/graph/iterator.go +++ b/graph/iterator.go @@ -237,6 +237,7 @@ const ( Not Optional Materialize + Unique ) var ( @@ -258,6 +259,7 @@ var ( "not", "optional", "materialize", + "unique", } ) diff --git a/graph/iterator/unique_iterator.go b/graph/iterator/unique_iterator.go new file mode 100644 index 0000000..930b136 --- /dev/null +++ b/graph/iterator/unique_iterator.go @@ -0,0 +1,162 @@ +package iterator + +import ( + "github.com/google/cayley/graph" +) + +// Unique iterator removes duplicate values from it's subiterator. +type Unique struct { + uid uint64 + tags graph.Tagger + subIt graph.Iterator + result graph.Value + runstats graph.IteratorStats + err error + seen map[graph.Value]bool +} + +func NewUnique(subIt graph.Iterator) *Unique { + return &Unique{ + uid: NextUID(), + subIt: subIt, + seen: make(map[graph.Value]bool), + } +} + +func (it *Unique) UID() uint64 { + return it.uid +} + +// Reset resets the internal iterators and the iterator itself. +func (it *Unique) Reset() { + it.result = nil + it.subIt.Reset() + it.seen = make(map[graph.Value]bool) +} + +func (it *Unique) Tagger() *graph.Tagger { + return &it.tags +} + +func (it *Unique) TagResults(dst map[string]graph.Value) { + for _, tag := range it.tags.Tags() { + dst[tag] = it.Result() + } + + for tag, value := range it.tags.Fixed() { + dst[tag] = value + } + + if it.subIt != nil { + it.subIt.TagResults(dst) + } +} + +func (it *Unique) Clone() graph.Iterator { + uniq := NewUnique(it.subIt.Clone()) + uniq.tags.CopyFrom(it) + return uniq +} + +// SubIterators returns a slice of the sub iterators. The first iterator is the +// primary iterator, for which the complement is generated. +func (it *Unique) SubIterators() []graph.Iterator { + return []graph.Iterator{it.subIt} +} + +// DEPRECATED +func (it *Unique) ResultTree() *graph.ResultTree { + tree := graph.NewResultTree(it.Result()) + tree.AddSubtree(it.subIt.ResultTree()) + return tree +} + +// Next advances the subiterator, continuing until it returns a value which it +// has not previously seen. +func (it *Unique) Next() bool { + graph.NextLogIn(it) + it.runstats.Next += 1 + + for graph.Next(it.subIt) { + curr := it.subIt.Result() + if ok := it.seen[curr]; !ok { + it.result = curr + it.seen[curr] = true + return graph.NextLogOut(it, it.result, true) + } + } + it.err = it.subIt.Err() + return graph.NextLogOut(it, nil, false) +} + +func (it *Unique) Err() error { + return it.err +} + +func (it *Unique) Result() graph.Value { + return it.result +} + +// Contains checks whether the passed value is part of the primary iterator, +// which is irrelevant for uniqueness. +func (it *Unique) Contains(val graph.Value) bool { + graph.ContainsLogIn(it, val) + it.runstats.Contains += 1 + return graph.ContainsLogOut(it, val, it.subIt.Contains(val)) +} + +// NextPath for unique always returns false. If we were to return multiple +// paths, we'd no longer be a unique result, so we have to choose only the first +// path that got us here. Unique is serious on this point. +func (it *Unique) NextPath() bool { + return false +} + +// Close closes the primary iterators. +func (it *Unique) Close() error { + it.seen = nil + return it.subIt.Close() +} + +func (it *Unique) Type() graph.Type { return graph.Unique } + +func (it *Unique) Optimize() (graph.Iterator, bool) { + newIt, optimized := it.subIt.Optimize() + if optimized { + it.subIt = newIt + } + return it, false +} + +const uniquenessFactor = 2 + +func (it *Unique) Stats() graph.IteratorStats { + subStats := it.subIt.Stats() + return graph.IteratorStats{ + NextCost: subStats.NextCost * uniquenessFactor, + ContainsCost: subStats.ContainsCost, + Size: subStats.Size / uniquenessFactor, + Next: it.runstats.Next, + Contains: it.runstats.Contains, + ContainsNext: it.runstats.ContainsNext, + } +} + +func (it *Unique) Size() (int64, bool) { + return it.Stats().Size, false +} + +func (it *Unique) Describe() graph.Description { + subIts := []graph.Description{ + it.subIt.Describe(), + } + + return graph.Description{ + UID: it.UID(), + Type: it.Type(), + Tags: it.tags.Tags(), + Iterators: subIts, + } +} + +var _ graph.Nexter = &Unique{} diff --git a/graph/iterator/unique_iterator_test.go b/graph/iterator/unique_iterator_test.go new file mode 100644 index 0000000..1923f47 --- /dev/null +++ b/graph/iterator/unique_iterator_test.go @@ -0,0 +1,31 @@ +package iterator + +import ( + "reflect" + "testing" +) + +func TestUniqueIteratorBasics(t *testing.T) { + allIt := NewFixed(Identity) + allIt.Add(1) + allIt.Add(2) + allIt.Add(3) + allIt.Add(3) + allIt.Add(2) + + u := NewUnique(allIt) + + expect := []int{1, 2, 3} + for i := 0; i < 2; i++ { + if got := iterated(u); !reflect.DeepEqual(got, expect) { + t.Errorf("Failed to iterate Unique correctly on repeat %d: got:%v expected:%v", i, got, expect) + } + u.Reset() + } + + for _, v := range []int{1, 2, 3} { + if !u.Contains(v) { + t.Errorf("Failed to find a correct value in the unique iterator.") + } + } +}