Update normalize.jl
parent
5c8109833c
commit
ac6d315dc9
|
@ -3,7 +3,8 @@
|
|||
"""
|
||||
Normalized(dist::Union{StringSemiMetric, StringMetric})
|
||||
|
||||
Creates a normalized distance. The normalized distance always return a Float64 between 0.0 and 1.0 (or a missing if one of the argument is missing)
|
||||
Creates a normalized distance. The normalized distance always return a Float64 between 0.0 and 1.0 (or a missing if one of the argument is missing).
|
||||
A Normalized Distance has a keyword argument `max_dist` that defaults to 1.0. It returns 1.0 if the true distance is higher than `max_dist`.
|
||||
|
||||
### Examples
|
||||
```julia-repl
|
||||
|
@ -20,7 +21,7 @@ struct Normalized{T <: Union{StringSemiMetric, StringMetric}} <: StringSemiMetri
|
|||
end
|
||||
Normalized(dist::Normalized) = dist
|
||||
|
||||
# this basically says that all distances are considered to be normalized by default
|
||||
# Consider all distances to be normalized by default
|
||||
function (dist::Normalized)(s1, s2; max_dist = 1.0)
|
||||
out = dist.dist(s1, s2; max_dist = max_dist)
|
||||
max_dist !== nothing && out > max_dist && return 1.0
|
||||
|
@ -29,9 +30,9 @@ end
|
|||
|
||||
function (dist::Normalized{<:Union{Hamming, DamerauLevenshtein}})(s1, s2; max_dist = 1.0)
|
||||
(s1 === missing) | (s2 === missing) && return missing
|
||||
isempty(s1) && isempty(s2) && return 0.0
|
||||
s1, s2 = reorder(s1, s2)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len2 == 0 && return 0.0
|
||||
out = dist.dist(s1, s2) / len2
|
||||
max_dist !== nothing && out > max_dist && return 1.0
|
||||
return out
|
||||
|
@ -39,9 +40,9 @@ end
|
|||
|
||||
function (dist::Normalized{<:Union{Levenshtein, OptimalStringAlignement}})(s1, s2; max_dist = 1.0)
|
||||
(s1 === missing) | (s2 === missing) && return missing
|
||||
isempty(s1) && isempty(s2) && return 0.0
|
||||
s1, s2 = reorder(s1, s2)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len2 == 0 && return 0.0
|
||||
if max_dist == 1.0
|
||||
d = dist.dist(s1, s2)
|
||||
else
|
||||
|
@ -57,7 +58,7 @@ function (dist::Normalized{<:AbstractQGramDistance})(s1, s2; max_dist = 1.0)
|
|||
# When string length < q for qgram distance, returns s1 == s2
|
||||
s1, s2 = reorder(s1, s2)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len1 <= dist.dist.q - 1 && return convert(Float64, s1 != s2)
|
||||
len1 <= dist.dist.q - 1 && return Float64(s1 != s2)
|
||||
if dist.dist isa QGram
|
||||
out = dist.dist(s1, s2) / (len1 + len2 - 2 * dist.dist.q + 2)
|
||||
else
|
||||
|
|
Loading…
Reference in New Issue