Update normalize.jl
parent
5c8109833c
commit
ac6d315dc9
|
@ -3,7 +3,8 @@
|
||||||
"""
|
"""
|
||||||
Normalized(dist::Union{StringSemiMetric, StringMetric})
|
Normalized(dist::Union{StringSemiMetric, StringMetric})
|
||||||
|
|
||||||
Creates a normalized distance. The normalized distance always return a Float64 between 0.0 and 1.0 (or a missing if one of the argument is missing)
|
Creates a normalized distance. The normalized distance always return a Float64 between 0.0 and 1.0 (or a missing if one of the argument is missing).
|
||||||
|
A Normalized Distance has a keyword argument `max_dist` that defaults to 1.0. It returns 1.0 if the true distance is higher than `max_dist`.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
```julia-repl
|
```julia-repl
|
||||||
|
@ -20,7 +21,7 @@ struct Normalized{T <: Union{StringSemiMetric, StringMetric}} <: StringSemiMetri
|
||||||
end
|
end
|
||||||
Normalized(dist::Normalized) = dist
|
Normalized(dist::Normalized) = dist
|
||||||
|
|
||||||
# this basically says that all distances are considered to be normalized by default
|
# Consider all distances to be normalized by default
|
||||||
function (dist::Normalized)(s1, s2; max_dist = 1.0)
|
function (dist::Normalized)(s1, s2; max_dist = 1.0)
|
||||||
out = dist.dist(s1, s2; max_dist = max_dist)
|
out = dist.dist(s1, s2; max_dist = max_dist)
|
||||||
max_dist !== nothing && out > max_dist && return 1.0
|
max_dist !== nothing && out > max_dist && return 1.0
|
||||||
|
@ -29,9 +30,9 @@ end
|
||||||
|
|
||||||
function (dist::Normalized{<:Union{Hamming, DamerauLevenshtein}})(s1, s2; max_dist = 1.0)
|
function (dist::Normalized{<:Union{Hamming, DamerauLevenshtein}})(s1, s2; max_dist = 1.0)
|
||||||
(s1 === missing) | (s2 === missing) && return missing
|
(s1 === missing) | (s2 === missing) && return missing
|
||||||
isempty(s1) && isempty(s2) && return 0.0
|
|
||||||
s1, s2 = reorder(s1, s2)
|
s1, s2 = reorder(s1, s2)
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
|
len2 == 0 && return 0.0
|
||||||
out = dist.dist(s1, s2) / len2
|
out = dist.dist(s1, s2) / len2
|
||||||
max_dist !== nothing && out > max_dist && return 1.0
|
max_dist !== nothing && out > max_dist && return 1.0
|
||||||
return out
|
return out
|
||||||
|
@ -39,9 +40,9 @@ end
|
||||||
|
|
||||||
function (dist::Normalized{<:Union{Levenshtein, OptimalStringAlignement}})(s1, s2; max_dist = 1.0)
|
function (dist::Normalized{<:Union{Levenshtein, OptimalStringAlignement}})(s1, s2; max_dist = 1.0)
|
||||||
(s1 === missing) | (s2 === missing) && return missing
|
(s1 === missing) | (s2 === missing) && return missing
|
||||||
isempty(s1) && isempty(s2) && return 0.0
|
|
||||||
s1, s2 = reorder(s1, s2)
|
s1, s2 = reorder(s1, s2)
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
|
len2 == 0 && return 0.0
|
||||||
if max_dist == 1.0
|
if max_dist == 1.0
|
||||||
d = dist.dist(s1, s2)
|
d = dist.dist(s1, s2)
|
||||||
else
|
else
|
||||||
|
@ -57,7 +58,7 @@ function (dist::Normalized{<:AbstractQGramDistance})(s1, s2; max_dist = 1.0)
|
||||||
# When string length < q for qgram distance, returns s1 == s2
|
# When string length < q for qgram distance, returns s1 == s2
|
||||||
s1, s2 = reorder(s1, s2)
|
s1, s2 = reorder(s1, s2)
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
len1 <= dist.dist.q - 1 && return convert(Float64, s1 != s2)
|
len1 <= dist.dist.q - 1 && return Float64(s1 != s2)
|
||||||
if dist.dist isa QGram
|
if dist.dist isa QGram
|
||||||
out = dist.dist(s1, s2) / (len1 + len2 - 2 * dist.dist.q + 2)
|
out = dist.dist(s1, s2) / (len1 + len2 - 2 * dist.dist.q + 2)
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue