From aed1fc2ad8d1125c7a267e57b3e3324cc5c9e1da Mon Sep 17 00:00:00 2001 From: matthieugomez Date: Fri, 23 Oct 2020 10:26:33 -0700 Subject: [PATCH] Revert "add back Hamming" This reverts commit 6e1013d49cd45019e8568586c2163e70daa2e67e. --- README.md | 1 - src/StringDistances.jl | 3 +-- src/distances/edit.jl | 13 ------------- src/normalize.jl | 12 +----------- test/distances.jl | 8 -------- test/modifiers.jl | 4 ---- 6 files changed, 2 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 92e3167..22f63ac 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ Distances are defined for `AbstractStrings`, and any iterator that define `lengt The available distances are: - Edit Distances - - [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) `Hamming()` - [Jaro Distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance) `Jaro()` - [Levenshtein Distance](https://en.wikipedia.org/wiki/Levenshtein_distance) `Levenshtein()` - [Damerau-Levenshtein Distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) `DamerauLevenshtein()` diff --git a/src/StringDistances.jl b/src/StringDistances.jl index 341f52a..f135480 100755 --- a/src/StringDistances.jl +++ b/src/StringDistances.jl @@ -7,7 +7,7 @@ include("distances/edit.jl") include("distances/qgram.jl") include("normalize.jl") -const StringDistance = Union{Hamming, Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax, Normalize} +const StringDistance = Union{Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax, Normalize} # Distances API Distances.result_type(dist::StringDistance, s1, s2) = typeof(dist("", "")) include("find.jl") @@ -21,7 +21,6 @@ include("find.jl") export StringDistance, -Hamming, Levenshtein, DamerauLevenshtein, Jaro, diff --git a/src/distances/edit.jl b/src/distances/edit.jl index 0f3a69b..7c572ab 100755 --- a/src/distances/edit.jl +++ b/src/distances/edit.jl @@ -1,16 +1,3 @@ - -function (dist::Hamming)(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, max_dist::Union{Integer, Nothing} = nothing) - ((s1 === missing) | (s2 === missing)) && return missing - current = abs(length(s2) - length(s1)) - max_dist !== nothing && current > max_dist && return max_dist + 1 - for (ch1, ch2) in zip(s1, s2) - current += ch1 != ch2 - max_dist !== nothing && current > max_dist && return max_dist + 1 - end - return current -end - - """ Jaro() diff --git a/src/normalize.jl b/src/normalize.jl index cbef8bf..a2baade 100755 --- a/src/normalize.jl +++ b/src/normalize.jl @@ -12,18 +12,8 @@ normalize(dist::SemiMetric) = Normalize(dist) normalize(dist::Normalize) = dist -function (dist::Normalize{Hamming})(s1::AbstractString, s2::AbstractString; max_dist = 1.0) - s1, s2 = reorder(s1, s2) - len1, len2 = length(s1), length(s2) - len2 == 0 && return 1.0 - out = evaluate(dist, s1, s2, max_dist * len2) / len2 - out > max_dist ? 1.0 : out -end - - - # A normalized distance is between 0 and 1, and accept a third argument, max_dist. -function (dist::Normalize{<: Union{Hamming, Levenshtein, DamerauLevenshtein}})(s1, s2, max_dist = 1.0) +function (dist::Normalize{<: Union{Levenshtein, DamerauLevenshtein}})(s1, s2, max_dist = 1.0) ((s1 === missing) | (s2 === missing)) && return missing s1, s2 = reorder(s1, s2) len1, len2 = length(s1), length(s2) diff --git a/test/distances.jl b/test/distances.jl index 38d0c4f..e688b48 100644 --- a/test/distances.jl +++ b/test/distances.jl @@ -3,14 +3,6 @@ using StringDistances, Unicode, Test @testset "Distances" begin - @testset "Hamming" begin - @test evaluate(Hamming(), "martha", "marhta") ≈ 2 - @test evaluate(Hamming(), "es an ", " vs an") ≈ 6 - @test result_type(Hamming(), "hello", "world") == typeof(1) - @inferred evaluate(Hamming(), "", "") - @test ismissing(evaluate(Hamming(), "", missing)) - end - @testset "Jaro" begin @test evaluate(Jaro(), "martha", "marhta") ≈ 0.05555555555555547 @test evaluate(Jaro(), "es an ", " vs an") ≈ 0.2777777777777777 diff --git a/test/modifiers.jl b/test/modifiers.jl index db70973..1f960d6 100644 --- a/test/modifiers.jl +++ b/test/modifiers.jl @@ -16,10 +16,6 @@ using StringDistances, Unicode, Test @test compare("martha", "martha", Overlap(2)) ≈ 1.0 atol = 1e-4 @test compare("martha", "martha", SorensenDice(2)) ≈ 1.0 atol = 1e-4 - #Hamming - compare("aüa", "aua", Hamming()) ≈ 2/3 atol = 1e-4 - compare("aaua", "aa", Partial(Hamming())) ≈ 1.0 atol = 1e-4 - # Jaro compare("aüa", "aua", Jaro())