allow more than strings

pull/22/head
matthieugomez 2020-02-07 08:31:00 -05:00
parent 276d022df6
commit 9f30c134cf
3 changed files with 22 additions and 12 deletions

View File

@ -17,9 +17,13 @@ include("find.jl")
##
##############################################################################
evaluate(::StringDistance, ::Missing, ::AbstractString) = missing
evaluate(::StringDistance, ::AbstractString, ::Missing) = missing
evaluate(::StringDistance, ::Missing, ::Missing) = missing
evaluate(::QGramDistance, ::Missing, ::AbstractString) = missing
evaluate(::QGramDistance, ::AbstractString, ::Missing) = missing
evaluate(::RatcliffObershelp, ::Missing, ::AbstractString) = missing
evaluate(::RatcliffObershelp, ::AbstractString, ::Missing) = missing
compare(::Missing, ::AbstractString, ::StringDistance; min_score = 0.0) = missing
compare(::AbstractString, ::Missing, ::StringDistance; min_score = 0.0) = missing

View File

@ -15,7 +15,8 @@ struct Jaro <: StringDistance end
## http://alias-i.com/lingpipe/docs/api/com/aliasi/spell/JaroWinklerDistance.html
function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
function evaluate(dist::Jaro, s1, s2)
(ismissing(s1) | ismissing(s2)) && return missing
s1, s2 = reorder(s1, s2)
len1, len2 = length(s1), length(s2)
# if both are empty, m = 0 so should be 1.0 according to wikipedia.
@ -85,7 +86,8 @@ struct Levenshtein <: StringDistance end
# Return max_dist +1 if distance higher than max_dist
# This makes it possible to differentiate distance equalt to max_dist vs strictly higher
# This is important for find_all
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString; max_dist = nothing)
function evaluate(dist::Levenshtein, s1, s2; max_dist = nothing)
(ismissing(s1) | ismissing(s2)) && return missing
s1, s2 = reorder(s1, s2)
len1, len2 = length(s1), length(s2)
max_dist !== nothing && len2 - len1 > max_dist && return max_dist + 1
@ -138,7 +140,8 @@ required to change one string into the other.
struct DamerauLevenshtein <: StringDistance end
## http://blog.softwx.net/2015/01/optimizing-damerau-levenshtein_15.html
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString; max_dist = nothing)
function evaluate(dist::DamerauLevenshtein, s1, s2; max_dist = nothing)
(ismissing(s1) | ismissing(s2)) && return missing
s1, s2 = reorder(s1, s2)
len1, len2 = length(s1), length(s2)
max_dist !== nothing && len2 - len1 > max_dist && return max_dist + 1

View File

@ -10,17 +10,20 @@ Base.nextind(s::StringWithLength, i::Int, n::Int = 1) = nextind(s.s, i, n)
Base.ncodeunits(s::StringWithLength) = ncodeunits(s.s)
Base.isvalid(s::StringWithLength, i::Int) = isvalid(s.s, i)
function reorder(s1::AbstractString, s2::AbstractString)
s1 = string_with_length(s1)
s2 = string_with_length(s2)
if length(s1) <= length(s2)
return s1, s2
else
return s2, s1
end
(length(s1) <= length(s2)) ? (s1, s2) : (s2, s1)
end
function common_prefix(s1::AbstractString, s2::AbstractString)
function reorder(s1, s2)
(length(s1) <= length(s2)) ? (s1, s2) : (s2, s1)
end
function common_prefix(s1, s2)
x1 = iterate(s1)
x2 = iterate(s2)
l = 0