145 lines
4.4 KiB
Ruby
145 lines
4.4 KiB
Ruby
# Mercury tests, as well the quicksilver and liquidmetal algorithms which we
|
|
# sometimes use for comparison
|
|
|
|
require 'mercury'
|
|
|
|
# Fuzzy matching algorithm from Quicksilver, the OS X tool
|
|
def qs_score(entry, abbrev)
|
|
return 0.9 if abbrev.length == 0
|
|
return 0.0 if abbrev.length > entry.length
|
|
|
|
abbrev.length.downto(1) do |i|
|
|
sub_abbrev = abbrev[0...i]
|
|
index = entry.index(sub_abbrev)
|
|
|
|
next if index.nil?
|
|
next if index + sub_abbrev.length > entry.length
|
|
|
|
next_entry = entry[index+sub_abbrev.length..-1]
|
|
|
|
next_abbrev = i >= abbrev.length ? "" : abbrev[i..-1]
|
|
|
|
remaining_score = qs_score(next_entry, next_abbrev)
|
|
|
|
if remaining_score > 0
|
|
score = entry.length - next_entry.length
|
|
|
|
if index != 0
|
|
c = entry[index - 1]
|
|
word_boundaries = " \t/._"
|
|
if word_boundaries.include?(c)
|
|
for j in 0...(index-1)
|
|
c = entry[j]
|
|
score -= word_boundaries.include?(c) ? 1 : 0.15
|
|
end
|
|
else
|
|
score -= index
|
|
end
|
|
end
|
|
|
|
score += remaining_score * next_entry.length
|
|
score /= entry.length
|
|
return score
|
|
end
|
|
end
|
|
return 0.0
|
|
end
|
|
|
|
# Port of Ryan McGeary's LiquidMetal fuzzy matching algorithm found at:
|
|
# http://github.com/rmm5t/liquidmetal/tree/master.
|
|
class LiquidMetal
|
|
@@SCORE_NO_MATCH = 0.0
|
|
@@SCORE_MATCH = 1.0
|
|
@@SCORE_TRAILING = 0.8
|
|
@@SCORE_TRAILING_BUT_STARTED = 0.90
|
|
@@SCORE_BUFFER = 0.85
|
|
|
|
def self.score(string, abbrev)
|
|
return @@SCORE_TRAILING if abbrev.empty?
|
|
return @@SCORE_NO_MATCH if abbrev.length > string.length
|
|
|
|
scores = buildScoreArray(string, abbrev)
|
|
|
|
sum = scores.inject { |a, b| a + b }
|
|
|
|
return sum / scores.length;
|
|
end
|
|
|
|
def self.buildScoreArray(string, abbrev)
|
|
scores = Array.new(string.length)
|
|
lower = string.downcase()
|
|
|
|
lastIndex = -1
|
|
started = false
|
|
|
|
abbrev.downcase().each_byte do |c|
|
|
index = lower.index(c, lastIndex + 1)
|
|
return scores.fill(@@SCORE_NO_MATCH, 0..-1) if index.nil?
|
|
started = true if index == 0
|
|
|
|
if index > 0 and " \t/._-".include?(string[index - 1])
|
|
scores[index - 1] = @@SCORE_MATCH
|
|
scores.fill(@@SCORE_BUFFER, (lastIndex + 1)...(index - 1))
|
|
elsif string[index] >= "A"[0] and string[index] <= "Z"[0]
|
|
scores.fill(@@SCORE_BUFFER, (lastIndex + 1)...index)
|
|
else
|
|
scores.fill(@@SCORE_NO_MATCH, (lastIndex + 1)...index)
|
|
end
|
|
|
|
scores[index] = @@SCORE_MATCH
|
|
lastIndex = index
|
|
end
|
|
|
|
trailing_score = started ? @@SCORE_TRAILING_BUT_STARTED : @@SCORE_TRAILING
|
|
scores.fill(trailing_score, lastIndex + 1)
|
|
return scores
|
|
end
|
|
end
|
|
|
|
def print_score(entry, abbrev)
|
|
old_score = LiquidMetal.score(entry, abbrev)
|
|
new_score = Mercury.score(entry, abbrev)
|
|
|
|
puts "%-25s %-10s %.4f %.4f" % [entry, abbrev, old_score, new_score]
|
|
end
|
|
|
|
def test(high_entry, low_entry, abbrev)
|
|
high = Mercury.score(high_entry, abbrev)
|
|
low = Mercury.score(low_entry, abbrev)
|
|
puts "%-8s %-10s %-25s %.4f %-25s %.4f" % [high > low ? "PASS" : "FAIL", abbrev,
|
|
high_entry, high, low_entry, low]
|
|
end
|
|
|
|
print_score("phone numbers.txt", "phnb")
|
|
print_score("phone numbers.txt", "pht")
|
|
print_score("poohead.txt", "pht")
|
|
print_score("poohead.txt", "poo")
|
|
print_score("poohead.txt", "pood")
|
|
print_score("foo.lua", "lua")
|
|
print_score("lua-expression.h", "lua")
|
|
print_score("query-dispatcher.h", "qudisp")
|
|
print_score("query-dispatcher.cc", "qudisp")
|
|
print_score("query-dispatcher.h", "qudisph")
|
|
print_score("query-dispatcher.cc", "qudisph")
|
|
print_score("query-dispatczer.h", "qudisph")
|
|
print_score("query-dispatczer.h", "qudisp")
|
|
print_score("aaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaa")
|
|
print_score("query-executor.h", "exec")
|
|
print_score("query-executor.cc", "exec")
|
|
print_score("tablet-executor-node.h", "ten")
|
|
print_score("materialize-executor-node.h", "ten")
|
|
|
|
test("protocoltype.go", "serverstatus.proto", "proto")
|
|
test("protocoltype.go", "test.proto", "proto")
|
|
test("protocoltype.go", "test.proto", "pr")
|
|
test("protocoltype.go", "test.proto", "p")
|
|
test("protocoltype.go", "test.proto", "poto")
|
|
test("query-dispatcher.h", "query-dispatcher.cc", "qudisph")
|
|
test("query-plan.h", "query-dispatcher.h", "qup")
|
|
test("phone numbers.txt", "poohead.txt", "pht")
|
|
test("tokyo.txt", "abcde.txt", "t")
|
|
test("tablet-executor-node.h", "materialize-executor-node.h", "ten")
|
|
test("tent.txt", "tablet-executor-node.h", "ten")
|
|
test("tent.txt", "tablet-executor-node.h", "te")
|
|
test("ad", "m-a", "a")
|
|
test("a", "b", "A")
|