Revert "add back Hamming"

This reverts commit 6e1013d49c.
pull/38/head
matthieugomez 2020-10-23 10:26:33 -07:00
parent 6e1013d49c
commit aed1fc2ad8
6 changed files with 2 additions and 39 deletions

View File

@ -11,7 +11,6 @@ Distances are defined for `AbstractStrings`, and any iterator that define `lengt
The available distances are:
- Edit Distances
- [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) `Hamming()`
- [Jaro Distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance) `Jaro()`
- [Levenshtein Distance](https://en.wikipedia.org/wiki/Levenshtein_distance) `Levenshtein()`
- [Damerau-Levenshtein Distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) `DamerauLevenshtein()`

View File

@ -7,7 +7,7 @@ include("distances/edit.jl")
include("distances/qgram.jl")
include("normalize.jl")
const StringDistance = Union{Hamming, Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax, Normalize}
const StringDistance = Union{Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax, Normalize}
# Distances API
Distances.result_type(dist::StringDistance, s1, s2) = typeof(dist("", ""))
include("find.jl")
@ -21,7 +21,6 @@ include("find.jl")
export
StringDistance,
Hamming,
Levenshtein,
DamerauLevenshtein,
Jaro,

View File

@ -1,16 +1,3 @@
function (dist::Hamming)(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, max_dist::Union{Integer, Nothing} = nothing)
((s1 === missing) | (s2 === missing)) && return missing
current = abs(length(s2) - length(s1))
max_dist !== nothing && current > max_dist && return max_dist + 1
for (ch1, ch2) in zip(s1, s2)
current += ch1 != ch2
max_dist !== nothing && current > max_dist && return max_dist + 1
end
return current
end
"""
Jaro()

View File

@ -12,18 +12,8 @@ normalize(dist::SemiMetric) = Normalize(dist)
normalize(dist::Normalize) = dist
function (dist::Normalize{Hamming})(s1::AbstractString, s2::AbstractString; max_dist = 1.0)
s1, s2 = reorder(s1, s2)
len1, len2 = length(s1), length(s2)
len2 == 0 && return 1.0
out = evaluate(dist, s1, s2, max_dist * len2) / len2
out > max_dist ? 1.0 : out
end
# A normalized distance is between 0 and 1, and accept a third argument, max_dist.
function (dist::Normalize{<: Union{Hamming, Levenshtein, DamerauLevenshtein}})(s1, s2, max_dist = 1.0)
function (dist::Normalize{<: Union{Levenshtein, DamerauLevenshtein}})(s1, s2, max_dist = 1.0)
((s1 === missing) | (s2 === missing)) && return missing
s1, s2 = reorder(s1, s2)
len1, len2 = length(s1), length(s2)

View File

@ -3,14 +3,6 @@ using StringDistances, Unicode, Test
@testset "Distances" begin
@testset "Hamming" begin
@test evaluate(Hamming(), "martha", "marhta") 2
@test evaluate(Hamming(), "es an ", " vs an") 6
@test result_type(Hamming(), "hello", "world") == typeof(1)
@inferred evaluate(Hamming(), "", "")
@test ismissing(evaluate(Hamming(), "", missing))
end
@testset "Jaro" begin
@test evaluate(Jaro(), "martha", "marhta") 0.05555555555555547
@test evaluate(Jaro(), "es an ", " vs an") 0.2777777777777777

View File

@ -16,10 +16,6 @@ using StringDistances, Unicode, Test
@test compare("martha", "martha", Overlap(2)) 1.0 atol = 1e-4
@test compare("martha", "martha", SorensenDice(2)) 1.0 atol = 1e-4
#Hamming
compare("aüa", "aua", Hamming()) 2/3 atol = 1e-4
compare("aaua", "aa", Partial(Hamming())) 1.0 atol = 1e-4
# Jaro
compare("aüa", "aua", Jaro())