allow more than strings
parent
276d022df6
commit
9f30c134cf
|
@ -17,9 +17,13 @@ include("find.jl")
|
|||
##
|
||||
##############################################################################
|
||||
|
||||
evaluate(::StringDistance, ::Missing, ::AbstractString) = missing
|
||||
evaluate(::StringDistance, ::AbstractString, ::Missing) = missing
|
||||
evaluate(::StringDistance, ::Missing, ::Missing) = missing
|
||||
evaluate(::QGramDistance, ::Missing, ::AbstractString) = missing
|
||||
evaluate(::QGramDistance, ::AbstractString, ::Missing) = missing
|
||||
|
||||
evaluate(::RatcliffObershelp, ::Missing, ::AbstractString) = missing
|
||||
evaluate(::RatcliffObershelp, ::AbstractString, ::Missing) = missing
|
||||
|
||||
|
||||
|
||||
compare(::Missing, ::AbstractString, ::StringDistance; min_score = 0.0) = missing
|
||||
compare(::AbstractString, ::Missing, ::StringDistance; min_score = 0.0) = missing
|
||||
|
|
|
@ -15,7 +15,8 @@ struct Jaro <: StringDistance end
|
|||
|
||||
|
||||
## http://alias-i.com/lingpipe/docs/api/com/aliasi/spell/JaroWinklerDistance.html
|
||||
function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
|
||||
function evaluate(dist::Jaro, s1, s2)
|
||||
(ismissing(s1) | ismissing(s2)) && return missing
|
||||
s1, s2 = reorder(s1, s2)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
# if both are empty, m = 0 so should be 1.0 according to wikipedia.
|
||||
|
@ -85,7 +86,8 @@ struct Levenshtein <: StringDistance end
|
|||
# Return max_dist +1 if distance higher than max_dist
|
||||
# This makes it possible to differentiate distance equalt to max_dist vs strictly higher
|
||||
# This is important for find_all
|
||||
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString; max_dist = nothing)
|
||||
function evaluate(dist::Levenshtein, s1, s2; max_dist = nothing)
|
||||
(ismissing(s1) | ismissing(s2)) && return missing
|
||||
s1, s2 = reorder(s1, s2)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
max_dist !== nothing && len2 - len1 > max_dist && return max_dist + 1
|
||||
|
@ -138,7 +140,8 @@ required to change one string into the other.
|
|||
struct DamerauLevenshtein <: StringDistance end
|
||||
|
||||
## http://blog.softwx.net/2015/01/optimizing-damerau-levenshtein_15.html
|
||||
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString; max_dist = nothing)
|
||||
function evaluate(dist::DamerauLevenshtein, s1, s2; max_dist = nothing)
|
||||
(ismissing(s1) | ismissing(s2)) && return missing
|
||||
s1, s2 = reorder(s1, s2)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
max_dist !== nothing && len2 - len1 > max_dist && return max_dist + 1
|
||||
|
|
15
src/utils.jl
15
src/utils.jl
|
@ -10,17 +10,20 @@ Base.nextind(s::StringWithLength, i::Int, n::Int = 1) = nextind(s.s, i, n)
|
|||
Base.ncodeunits(s::StringWithLength) = ncodeunits(s.s)
|
||||
Base.isvalid(s::StringWithLength, i::Int) = isvalid(s.s, i)
|
||||
|
||||
|
||||
function reorder(s1::AbstractString, s2::AbstractString)
|
||||
s1 = string_with_length(s1)
|
||||
s2 = string_with_length(s2)
|
||||
if length(s1) <= length(s2)
|
||||
return s1, s2
|
||||
else
|
||||
return s2, s1
|
||||
end
|
||||
(length(s1) <= length(s2)) ? (s1, s2) : (s2, s1)
|
||||
end
|
||||
|
||||
function common_prefix(s1::AbstractString, s2::AbstractString)
|
||||
function reorder(s1, s2)
|
||||
(length(s1) <= length(s2)) ? (s1, s2) : (s2, s1)
|
||||
end
|
||||
|
||||
|
||||
|
||||
function common_prefix(s1, s2)
|
||||
x1 = iterate(s1)
|
||||
x2 = iterate(s2)
|
||||
l = 0
|
||||
|
|
Loading…
Reference in New Issue