pull/17/head
matthieugomez 2019-08-17 16:12:41 -04:00
parent 243bcaff24
commit 9e73346c07
3 changed files with 9 additions and 4 deletions

View File

@ -135,6 +135,7 @@ function compare(s1::AbstractString, s2::AbstractString, dist::TokenSet)
s0 = join(v0, " ")
s1 = join(v1, " ")
s2 = join(v2, " ")
isempty(s0) && return compare(s1, s2, dist.dist)
max(compare(s0, s1, dist.dist),
compare(s0, s2, dist.dist),
compare(s1, s2, dist.dist))

View File

@ -194,7 +194,8 @@ struct RatcliffObershelp <: PreMetric end
function evaluate(dist::RatcliffObershelp, s1::AbstractString, s2::AbstractString)
n_matched = sum(last.(matching_blocks(s1, s2)))
1.0 - 2 * n_matched / (length(s1) + length(s2))
len1, len2 = length(s1), length(s2)
len1 + len2 == 0 ? 0 : 1.0 - 2 * n_matched / (len1 + len2)
end
function matching_blocks(s1::AbstractString, s2::AbstractString)

View File

@ -68,10 +68,13 @@ s = "HSINCHUANG"
@test compare("mariners vs angels", "los angeles angels of anaheim at seattle mariners", TokenSet(RatcliffObershelp())) 1.0 - 0.09090909090909094
@test compare("New York Mets vs Atlanta Braves", "", RatcliffObershelp()) 0.0
@test compare("New York Mets vs Atlanta Braves", "", TokenSort(RatcliffObershelp())) 0.0
# ADD AGAIN
#@test compare("mariners vs angels", "", TokenSet(RatcliffObershelp())) ≈ 0.0
@test compare("mariners vs angels", "", TokenSet(RatcliffObershelp())) 0.0