improve support for missings

2019-12-12 09:38:20 -05:00 · 2019-12-12 09:38:20 -05:00 · 16cf5abb94
parent e0cc4f6bea
commit 16cf5abb94
10 changed files with 326 additions and 352 deletions
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@ -0,0 +1,24 @@
+name: CompatHelper
+
+on:
+  schedule:
+    - cron: '00 * * * *'
+
+jobs:
+  CompatHelper:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        julia-version: [1.2.0]
+        julia-arch: [x86]
+        os: [ubuntu-latest]
+    steps:
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.julia-version }}
+      - name: Pkg.add("CompatHelper")
+        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
+      - name: CompatHelper.main()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: julia -e 'using CompatHelper; CompatHelper.main()'
--- a/Project.toml
+++ b/Project.toml
@ -3,12 +3,18 @@ uuid = "88034a9c-02f8-509d-84a9-84ec65e18404"
 version = "0.5.0"

 [deps]
-DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"

 [compat]
-DataStructures = "0.17"
-Distances = "0.8"
 julia = "1"
+DataStructures = "0.14, 0.15, 0.16, 0.17"
+Distances = "0.2, 0.3, 0.4, 0.4, 0.6, 0.7, 0.8"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
+
+
--- a/src/StringDistances.jl
+++ b/src/StringDistances.jl
@ -1,13 +1,17 @@
 module StringDistances

+
+
+using Distances
+import Distances: evaluate, result_type
+using DataStructures  # for SortedSet in TokenSort
+
 ##############################################################################
 ##
 ## Export
 ##
 ##############################################################################
-using DataStructures
-import Base: eltype, length, iterate, ==, hash, isless, convert, show, @deprecate
-import Distances: evaluate, Hamming, hamming, PreMetric, result_type, SemiMetric
+
 export
 evaluate,
 compare,
@ -46,7 +50,6 @@ function result_type(m::Union{Hamming, Jaro, Levenshtein, DamerauLevenshtein, Ra
    typeof(evaluate(m, oneunit(a), oneunit(b)))
 end

-
 end

 ##############################################################################
--- a/src/compare.jl
+++ b/src/compare.jl
@ -10,31 +10,34 @@

 compare returns a similarity score between the strings `s1` and `s2` based on the distance `dist`
 """
-function compare(s1::AbstractString, s2::AbstractString,  dist::Hamming; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::Hamming; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    len2 == 0 && return 1.0
    1.0 - evaluate(dist, s1, s2) / len2
 end

-function compare(s1::AbstractString, s2::AbstractString, dist::Union{Jaro, RatcliffObershelp}; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::Union{Jaro, RatcliffObershelp}; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    1.0 - evaluate(dist, s1, s2)
 end

-function compare(s1::AbstractString, s2::AbstractString, dist::AbstractQGramDistance; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::AbstractQGramDistance; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    # When string length < q for qgram distance, returns s1 == s2
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    len1 <= dist.q - 1 && return convert(Float64, s1 == s2)
    if typeof(dist) <: QGram
-        1 - evaluate(dist, s1, s2) / (len1 + len2 - 2 * dist.q + 2)
+        1.0 - evaluate(dist, s1, s2) / (len1 + len2 - 2 * dist.q + 2)
    else
-        1 - evaluate(dist, s1, s2)
+        1.0 - evaluate(dist, s1, s2)
    end
 end

-
-function compare(s1::AbstractString, s2::AbstractString,  dist::Union{Levenshtein, DamerauLevenshtein}; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing},  dist::Union{Levenshtein, DamerauLevenshtein}; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    len2 == 0 && return 1.0
@ -48,10 +51,7 @@ function compare(s1::AbstractString, s2::AbstractString,  dist::Union{Levenshtei
    end
 end

-
-
-
-@deprecate compare(dist::PreMetric, s1::AbstractString, s2::AbstractString) compare(s1, s2, dist)
+@deprecate compare(dist::PreMetric, s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}) compare(s1, s2, dist)

 ##############################################################################
 ##
@ -76,7 +76,8 @@ end
 Winkler(x) = Winkler(x, 0.1, 0.7, 4)

 # hard to use min_score because of whether there is boost or not in the end
-function compare(s1::AbstractString, s2::AbstractString, dist::Winkler)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::Winkler; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    l = remove_prefix(s1, s2, dist.l)[1]
    # cannot do min_score because of boosting threshold
    score = compare(s1, s2, dist.dist)
@ -103,7 +104,8 @@ struct Partial{T <: PreMetric} <: PreMetric
    dist::T
 end

-function compare(s1::AbstractString, s2::AbstractString, dist::Partial; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::Partial; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    len1 == len2 && return compare(s1, s2, dist.dist; min_score = min_score)
@ -117,8 +119,8 @@ function compare(s1::AbstractString, s2::AbstractString, dist::Partial; min_scor
    return out
 end

-function compare(s1::AbstractString, s2::AbstractString, dist::Partial{RatcliffObershelp}; 
-    min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::Partial{RatcliffObershelp}; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    len1 == len2 && return compare(s1, s2, dist.dist)
@ -134,7 +136,7 @@ function compare(s1::AbstractString, s2::AbstractString, dist::Partial{RatcliffO
            s2_start += len2 - s2_end
            s2_end += len2 - s2_end
        end
-        i2_start =  nextind(s2, 0, s2_start)
+        i2_start = nextind(s2, 0, s2_start)
        i2_end = nextind(s2, 0, s2_end)
        curr = compare(s1, SubString(s2, i2_start, i2_end), RatcliffObershelp())
        out = max(out, curr)
@ -157,7 +159,8 @@ struct TokenSort{T <: PreMetric} <: PreMetric
    dist::T
 end

-function compare(s1::AbstractString, s2::AbstractString, dist::TokenSort; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::TokenSort; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1 = join(sort!(split(s1)), " ")
    s2 = join(sort!(split(s2)), " ")
    compare(s1, s2, dist.dist; min_score = min_score)
@ -178,7 +181,8 @@ struct TokenSet{T <: PreMetric} <: PreMetric
    dist::T
 end

-function compare(s1::AbstractString, s2::AbstractString, dist::TokenSet; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::TokenSet; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    v1 = SortedSet(split(s1))
    v2 = SortedSet(split(s2))
    v0 = intersect(v1, v2)
@ -209,7 +213,8 @@ struct TokenMax{T <: PreMetric} <: PreMetric
    dist::T
 end

-function compare(s1::AbstractString, s2::AbstractString, dist::TokenMax; min_score = 0.0)
+function compare(s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}, dist::TokenMax; min_score = 0.0)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    dist0 = compare(s1, s2, dist.dist; min_score = min_score)
@ -239,22 +244,4 @@ function compare(s1::AbstractString, s2::AbstractString, dist::TokenMax; min_sco
                            min_score = min_score / unbase_scale) 
        return max(dist0, dist1, dist2)
    end
-end
-
-
-##############################################################################
-##
-## Missing Values
-##
-##############################################################################
-
-function compare(s1::AbstractString, ::Missing, dist::PreMetric; min_score = nothing)
-    missing
-end
-function compare(::Missing, s2::AbstractString, dist::PreMetric; min_score = nothing)
-    missing
-end
-function compare(::Missing, ::Missing, dist::PreMetric; min_score = nothing)
-    missing
-end
-
+end
--- a/src/edit.jl
+++ b/src/edit.jl
@ -4,7 +4,7 @@
 ## Hamming
 ##
 ##############################################################################
-function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString)
+function evaluate(dist::Hamming, s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing})
    current = abs(length(s2) - length(s1))
    for (ch1, ch2) in zip(s1, s2)
        current += ch1 != ch2
@ -12,6 +12,11 @@ function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString)
    return current
 end

+evaluate(dist::Hamming, s1::Missing, s2::AbstractString) = missing
+evaluate(dist::Hamming, s1::AbstractString, s2::Missing) = missing
+
+
+
 ##############################################################################
 ##
 ## Jaro
@ -33,7 +38,8 @@ where ``m`` is the number of matching characters and
 struct Jaro <: SemiMetric end

 ## http://alias-i.com/lingpipe/docs/api/com/aliasi/spell/JaroWinklerDistance.html
-function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
+function evaluate(dist::Jaro, s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing})
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    # if both are empty, m = 0 so should be 1.0 according to wikipedia. Add this line so that not the case
@ -85,8 +91,7 @@ function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
            t += ch2 != iterate(s1, i1_match[i1])[1]
        end
    end
-    current = 1.0 - (m / len1 + m / len2 + (m - t/2) / m) / 3.0
-    return current
+    return 1.0 - (m / len1 + m / len2 + (m - t/2) / m) / 3.0
 end

 ##############################################################################
@ -108,8 +113,8 @@ The Levenshtein distance is the minimum number of operations (consisting of inse
 struct Levenshtein <: SemiMetric end

 ## Source: http://blog.softwx.net/2014/12/optimizing-levenshtein-algorithm-in-c.html
-function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString;
-    max_dist = nothing)
+function evaluate(dist::Levenshtein, s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}; max_dist = nothing)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    max_dist !== nothing && len2 - len1 > max_dist && return max_dist + 1
@ -163,8 +168,8 @@ The DamerauLevenshtein distance is the minimum number of operations (consisting
 struct DamerauLevenshtein <: SemiMetric end

 ## http://blog.softwx.net/2015/01/optimizing-damerau-levenshtein_15.html
-function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString;
-    max_dist = nothing)
+function evaluate(dist::DamerauLevenshtein, s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing}; max_dist = nothing)
+    (ismissing(s1) | ismissing(s2)) && return missing
    s1, s2 = reorder(s1, s2)
    len1, len2 = length(s1), length(s2)
    max_dist !== nothing && len2 - len1 > max_dist && return max_dist + 1
@ -250,7 +255,8 @@ The distance between two strings is defined as one minus  the number of matching
 """
 struct RatcliffObershelp <: PreMetric end

-function evaluate(dist::RatcliffObershelp, s1::AbstractString, s2::AbstractString)
+function evaluate(dist::RatcliffObershelp, s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing})
+    (ismissing(s1) | ismissing(s2)) && return missing
    n_matched = sum(last.(matching_blocks(s1, s2)))
    len1, len2 = length(s1), length(s2)
    len1 + len2 == 0 ? 0. : 1.0 - 2 *  n_matched / (len1 + len2)
--- a/src/find.jl
+++ b/src/find.jl
@ -18,7 +18,6 @@ function find_best(s1::AbstractString, iter_s2, dist::PreMetric; min_score = 0.0
 end


-
 """
    find_all(s1::AbstractString, iter, dist::PreMetric; min_score = 0.8)
 `find_all` returns the vector with all the elements of `iter` that have a similarity score higher than `min_score` according to the distance `dist`. 
--- a/src/qgram.jl
+++ b/src/qgram.jl
@ -32,7 +32,7 @@ Return an iterator that iterates on the QGram of the string
 ```julia
 using StringDistances
 for x in qgram("hello", 2)
-	@show x
+	println(x)
 end
 ```
 """
@ -131,7 +131,8 @@ end
 ##############################################################################
 abstract type AbstractQGramDistance <: SemiMetric end

-function evaluate(dist::AbstractQGramDistance, s1::AbstractString, s2::AbstractString)
+function evaluate(dist::AbstractQGramDistance, s1::Union{AbstractString, Missing}, s2::Union{AbstractString, Missing})
+	(ismissing(s1) | ismissing(s2)) && return missing
 	x = count_map(qgram(s1, dist.q), qgram(s2, dist.q))
 	evaluate(dist, x)
 end
@ -141,11 +142,6 @@ end
 ## q-gram 
 ##
 ##############################################################################
-"""
-For an AbstractString s, denote v(s) the vector on the space of q-grams of length N, that contains the number of times a q-gram appears in s
-The q-gram distance is ||v(s1) - v(s2)||
-"""
-
 """
 	QGram(q::Int)

--- a/src/utils.jl
+++ b/src/utils.jl
@ -5,14 +5,13 @@ struct StringWithLength{T<:AbstractString} <: AbstractString
    l::Int
 end
 string_with_length(s::AbstractString) = StringWithLength(s, length(s))
-string_with_length(s::StringWithLength) = s
 Base.length(s::StringWithLength) = s.l
 Base.iterate(s::StringWithLength, i::Integer = firstindex(s.s)) = iterate(s.s, i)
-Base.isequal(s1::StringWithLength, s2::AbstractString) = isequal(s.s1, s2)
-Base.isequal(s1::AbstractString, s2::StringWithLength) = isequal(s1, s2.s)
 Base.nextind(s::StringWithLength, i::Int, n::Int = 1) = nextind(s.s, i, n)
 Base.ncodeunits(s::StringWithLength) = ncodeunits(s.s)
 Base.isvalid(s::StringWithLength, i::Int) = isvalid(s.s, i)
+
+
 function reorder(s1::AbstractString, s2::AbstractString)
    s1 = string_with_length(s1)
    s2 = string_with_length(s2)
--- a/test/distances.jl
+++ b/test/distances.jl
@ -3,147 +3,162 @@ using StringDistances, Test

@testset "Distances" begin

-@testset "Levenshtein" begin
-	@test evaluate(Levenshtein(), "", "") == 0
-	@test evaluate(Levenshtein(), "abc", "") == 3
-	@test evaluate(Levenshtein(), "", "abc") == 3
-	@test evaluate(Levenshtein(), "bc", "abc") == 1
-	@test evaluate(Levenshtein(), "kitten", "sitting") == 3
-	@test evaluate(Levenshtein(), "saturday", "sunday") == 3
-	@test evaluate(Levenshtein(), "hi, my name is", "my name is") == 4
-	@test evaluate(Levenshtein(), "alborgów", "amoniak") == 6
-	@test result_type(Levenshtein(), "hello", "world") == Int
-	@inferred evaluate(Levenshtein(), "", "")
-end
+	@testset "Levenshtein" begin
+		@test evaluate(Levenshtein(), "", "") == 0
+		@test evaluate(Levenshtein(), "abc", "") == 3
+		@test evaluate(Levenshtein(), "", "abc") == 3
+		@test evaluate(Levenshtein(), "bc", "abc") == 1
+		@test evaluate(Levenshtein(), "kitten", "sitting") == 3
+		@test evaluate(Levenshtein(), "saturday", "sunday") == 3
+		@test evaluate(Levenshtein(), "hi, my name is", "my name is") == 4
+		@test evaluate(Levenshtein(), "alborgów", "amoniak") == 6
+		@test result_type(Levenshtein(), "hello", "world") == Int
+		@test ismissing(evaluate(Levenshtein(), "", missing))
+		@inferred evaluate(Levenshtein(), "", "")
+	end

-@testset "DamerauLevenshtein" begin
-	@test evaluate(DamerauLevenshtein(), "", "") == 0
-	@test evaluate(DamerauLevenshtein(), "abc", "") == 3
-	@test evaluate(DamerauLevenshtein(), "bc", "abc") == 1
-	@test evaluate(DamerauLevenshtein(), "fuor", "four") == 1
-	@test evaluate(DamerauLevenshtein(), "abcd", "acb") == 2
-	@test evaluate(DamerauLevenshtein(), "cape sand recycling ", "edith ann graham") == 17
-	@test evaluate(DamerauLevenshtein(), "jellyifhs", "jellyfish") == 2
-	@test evaluate(DamerauLevenshtein(), "ifhs", "fish") == 2
-	@test result_type(DamerauLevenshtein(), "hello", "world") == Int
-	@inferred evaluate(DamerauLevenshtein(), "", "")
-end
+	@testset "DamerauLevenshtein" begin
+		@test evaluate(DamerauLevenshtein(), "", "") == 0
+		@test evaluate(DamerauLevenshtein(), "abc", "") == 3
+		@test evaluate(DamerauLevenshtein(), "bc", "abc") == 1
+		@test evaluate(DamerauLevenshtein(), "fuor", "four") == 1
+		@test evaluate(DamerauLevenshtein(), "abcd", "acb") == 2
+		@test evaluate(DamerauLevenshtein(), "cape sand recycling ", "edith ann graham") == 17
+		@test evaluate(DamerauLevenshtein(), "jellyifhs", "jellyfish") == 2
+		@test evaluate(DamerauLevenshtein(), "ifhs", "fish") == 2
+		@test result_type(DamerauLevenshtein(), "hello", "world") == Int
+		@test ismissing(evaluate(DamerauLevenshtein(), "", missing))
+		@inferred evaluate(DamerauLevenshtein(), "", "")
+	end

-@testset "Hamming" begin
-	@test evaluate(Hamming(), "", "") == 0
-	@test evaluate(Hamming(), "", "abc") == 3
-	@test evaluate(Hamming(), "abc", "abc") == 0
-	@test evaluate(Hamming(), "acc", "abc") == 1
-	@test evaluate(Hamming(), "abcd", "abc") == 1
-	@test evaluate(Hamming(), "abc", "abcd") == 1
-	@test evaluate(Hamming(), "testing", "this is a test") == 13
-	@test evaluate(Hamming(), "saturday", "sunday") == 7
-	@test result_type(Hamming(), "hello", "world") == Int
-	@inferred evaluate(Hamming(), "", "")
-end
+	@testset "Hamming" begin
+		@test evaluate(Hamming(), "", "") == 0
+		@test evaluate(Hamming(), "", "abc") == 3
+		@test evaluate(Hamming(), "abc", "abc") == 0
+		@test evaluate(Hamming(), "acc", "abc") == 1
+		@test evaluate(Hamming(), "abcd", "abc") == 1
+		@test evaluate(Hamming(), "abc", "abcd") == 1
+		@test evaluate(Hamming(), "testing", "this is a test") == 13
+		@test evaluate(Hamming(), "saturday", "sunday") == 7
+		@test result_type(Hamming(), "hello", "world") == Int
+		@test ismissing(evaluate(Hamming(), "", missing))
+		@inferred evaluate(Hamming(), "", "")
+	end

-@testset "QGram" begin
-	@test evaluate(QGram(1), "abc", "abc") == 0
-	@test evaluate(QGram(1), "", "abc") == 3
-	@test evaluate(QGram(1), "abc", "cba") == 0
-	@test evaluate(QGram(1), "abc", "ccc") == 4
-	@test result_type(QGram(1), "hello", "world") == Int
-	@inferred evaluate(QGram(1), "", "")
-end
+	@testset "QGram" begin
+		@test evaluate(QGram(1), "abc", "abc") == 0
+		@test evaluate(QGram(1), "", "abc") == 3
+		@test evaluate(QGram(1), "abc", "cba") == 0
+		@test evaluate(QGram(1), "abc", "ccc") == 4
+		@test evaluate(QGram(4), "aü☃", "aüaüafs") == 4
+		@test evaluate( QGram(2), SubString("aü☃", 1, 4), SubString("aüaüafs", 1, 4)) == 2
+		@test result_type(QGram(1), "hello", "world") == Int
+		@test ismissing(evaluate(QGram(1), "", missing))
+		@inferred evaluate(QGram(1), "", "")
+	end

-@testset "Cosine" begin
-	@test isnan(evaluate(Cosine(2), "", "abc"))
-	@test evaluate(Cosine(2), "abc", "ccc") ≈ 1 atol = 1e-4
-	@test evaluate(Cosine(2), "leia", "leela") ≈ 0.7113249 atol = 1e-4
-	@test result_type(Cosine(2), "hello", "world") == typeof(float(1))
-	@inferred evaluate(Cosine(2), "", "")
-	@inferred evaluate(Cosine(2), "abc", "ccc")
-end
+	@testset "Cosine" begin
+		@test isnan(evaluate(Cosine(2), "", "abc"))
+		@test evaluate(Cosine(2), "abc", "ccc") ≈ 1 atol = 1e-4
+		@test evaluate(Cosine(2), "leia", "leela") ≈ 0.7113249 atol = 1e-4
+		@test result_type(Cosine(2), "hello", "world") == typeof(float(1))
+		@test ismissing(evaluate(Cosine(2), "", missing))
+		@inferred evaluate(Cosine(2), "", "")
+	end

-@testset "Jaccard" begin
-	@test evaluate(Jaccard(1), "", "abc") ≈ 1.0
-	@test evaluate(Jaccard(1), "abc", "ccc") ≈ .666666 atol = 1e-4
-	@test evaluate(Jaccard(2), "leia", "leela") ≈ 0.83333 atol = 1e-4
-	@test result_type(Jaccard(1), "hello", "world") == typeof(float(1))
-	@inferred evaluate(Jaccard(1), "", "")
-end
+	@testset "Jaccard" begin
+		@test evaluate(Jaccard(1), "", "abc") ≈ 1.0
+		@test evaluate(Jaccard(1), "abc", "ccc") ≈ .666666 atol = 1e-4
+		@test evaluate(Jaccard(2), "leia", "leela") ≈ 0.83333 atol = 1e-4
+		@test result_type(Jaccard(1), "hello", "world") == typeof(float(1))
+		@test ismissing(evaluate(Jaccard(1), "", missing))
+		@inferred evaluate(Jaccard(1), "", "")
+	end

-@testset "SorensenDice" begin
-	@test evaluate(SorensenDice(1), "night", "nacht") ≈ 0.4 atol = 1e-4
-	@test evaluate(SorensenDice(2), "night", "nacht") ≈ 0.75 atol = 1e-4
-	@test result_type(SorensenDice(1), "hello", "world") == typeof(float(1))
-	@inferred evaluate(SorensenDice(1), "", "")
-end
+	@testset "SorensenDice" begin
+		@test evaluate(SorensenDice(1), "night", "nacht") ≈ 0.4 atol = 1e-4
+		@test evaluate(SorensenDice(2), "night", "nacht") ≈ 0.75 atol = 1e-4
+		@test result_type(SorensenDice(1), "hello", "world") == typeof(float(1))
+		@test ismissing(evaluate(SorensenDice(1), "", missing))
+		@inferred evaluate(SorensenDice(1), "", "")
+	end

-@testset "Overlap" begin
-	@test evaluate(Overlap(1), "night", "nacht") ≈ 0.4 atol = 1e-4
-	@test evaluate(Overlap(1), "context", "contact") ≈ .2 atol = 1e-4
-	@test result_type(Overlap(1), "hello", "world") == typeof(float(1))
-	@inferred evaluate(Overlap(1), "", "")
-end
+	@testset "Overlap" begin
+		@test evaluate(Overlap(1), "night", "nacht") ≈ 0.4 atol = 1e-4
+		@test evaluate(Overlap(1), "context", "contact") ≈ .2 atol = 1e-4
+		@test result_type(Overlap(1), "hello", "world") == typeof(float(1))
+		@test ismissing(evaluate(Overlap(1), "", missing))
+		@inferred evaluate(Overlap(1), "", "")
+	end

-@testset "RatcliffObershelp" begin
-	@test evaluate(RatcliffObershelp(), "dixon", "dicksonx") ≈ 1 - 0.6153846153846154
-	@test evaluate(RatcliffObershelp(), "alexandre", "aleksander") ≈ 1 - 0.7368421052631579
-	@test evaluate(RatcliffObershelp(), "pennsylvania",  "pencilvaneya") ≈ 1 - 0.6666666666666
-	@test evaluate(RatcliffObershelp(), "",  "pencilvaneya") ≈ 1.0
-	@test evaluate(RatcliffObershelp(),"NEW YORK METS", "NEW YORK MEATS") ≈ 1 -  0.962962962963
-	@test evaluate(RatcliffObershelp(), "Yankees",  "New York Yankees") ≈ 0.3913043478260869
-	@test evaluate(RatcliffObershelp(), "New York Mets",  "New York Yankees") ≈ 0.24137931034482762
-	@test result_type(RatcliffObershelp(), "hello", "world") == typeof(float(1))
-	@inferred evaluate(RatcliffObershelp(), "", "")
-end
+	@testset "RatcliffObershelp" begin
+		@test evaluate(RatcliffObershelp(), "dixon", "dicksonx") ≈ 1 - 0.6153846153846154
+		@test evaluate(RatcliffObershelp(), "alexandre", "aleksander") ≈ 1 - 0.7368421052631579
+		@test evaluate(RatcliffObershelp(), "pennsylvania",  "pencilvaneya") ≈ 1 - 0.6666666666666
+		@test evaluate(RatcliffObershelp(), "",  "pencilvaneya") ≈ 1.0
+		@test evaluate(RatcliffObershelp(),"NEW YORK METS", "NEW YORK MEATS") ≈ 1 -  0.962962962963
+		@test evaluate(RatcliffObershelp(), "Yankees",  "New York Yankees") ≈ 0.3913043478260869
+		@test evaluate(RatcliffObershelp(), "New York Mets",  "New York Yankees") ≈ 0.24137931034482762
+		@test result_type(RatcliffObershelp(), "hello", "world") == typeof(float(1))
+		@test ismissing(evaluate(RatcliffObershelp(), "", missing))
+		@inferred evaluate(RatcliffObershelp(), "", "")
+	end

-@testset "Jaro" begin
-	@test evaluate(Jaro(), "martha", "marhta") ≈  0.05555555555555547
-	@test evaluate(Jaro(), "es an ", " vs an") ≈ 0.2777777777777777
-	@test evaluate(Jaro(), " vs an", "es an ") ≈ 0.2777777777777777
-	@test result_type(Jaro(), "hello", "world") == typeof(float(1))
-end
+	@testset "Jaro" begin
+		@test evaluate(Jaro(), "martha", "marhta") ≈  0.05555555555555547
+		@test evaluate(Jaro(), "es an ", " vs an") ≈ 0.2777777777777777
+		@test evaluate(Jaro(), " vs an", "es an ") ≈ 0.2777777777777777
+		@test result_type(Jaro(), "hello", "world") == typeof(float(1))
+	end

-strings = [
-("martha", "marhta"),
-("dwayne", "duane") ,
-("dixon", "dicksonx"),
-("william", "williams"),
-("", "foo"),
-("a", "a"),
-("abc", "xyz"),
-("abc", "ccc"),
-("kitten", "sitting"),
-("saturday", "sunday"),
-("hi, my name is", "my name is"),
-("alborgów", "amoniak"),
-("cape sand recycling ", "edith ann graham"),
-( "jellyifhs", "jellyfish"),
-("ifhs", "fish"),
-("leia", "leela"),
-]
+	strings = [
+	("martha", "marhta"),
+	("dwayne", "duane") ,
+	("dixon", "dicksonx"),
+	("william", "williams"),
+	("", "foo"),
+	("a", "a"),
+	("abc", "xyz"),
+	("abc", "ccc"),
+	("kitten", "sitting"),
+	("saturday", "sunday"),
+	("hi, my name is", "my name is"),
+	("alborgów", "amoniak"),
+	("cape sand recycling ", "edith ann graham"),
+	( "jellyifhs", "jellyfish"),
+	("ifhs", "fish"),
+	("leia", "leela"),
+	]

-solutions = ((Levenshtein(), [2  2  4  1  3  0  3  2  3  3  4  6 17  3  3  2]),
-		(DamerauLevenshtein(), [1  2  4  1  3  0  3  2  3  3  4  6 17  2  2  2]),
-		(Jaro(), [0.05555556 0.17777778 0.23333333 0.04166667 1.00000000 0.00000000 1.00000000 0.44444444 0.25396825 0.2805556 0.2285714 0.48809524 0.3916667 0.07407407 0.16666667 0.21666667]),
-		(QGram(1), [0   3   3   1 3  0   6   4   5   4   4  11  14   0   0   3]),
-		(QGram(2), [  6   7   7   1 2 0   4   4   7   8   4  13  32   8   6   5]),
-		(Jaccard(1), [0.0 0.4285714 0.3750000 0.1666667       1.0 0.0 1.0000000 0.6666667 0.5714286 0.3750000 0.2000000 0.8333333 0.5000000 0.0 0.0 0.2500000]),
-		(Jaccard(2),  [ 0.7500000 0.8750000 0.7777778 0.1428571       1.0     NaN 1.0000000 1.0000000 0.7777778 0.8000000 0.3076923 1.0000000 0.9696970 0.6666667 1.0000000 0.8333333]),
-		(Cosine(2), [0.6000000 0.7763932 0.6220355 0.0741799  NaN  NaN 1.0000000 1.0000000 0.6348516 0.6619383 0.1679497 1.0000000 0.9407651 0.5000000 1.0000000 0.7113249]))
-# Test with R package StringDist
-for x in solutions
-	t, solution = x
-	for i in 1:length(solution)
-		if isnan(evaluate(t, strings[i]...))
-			@test isnan(solution[i])
-		else
-			@test evaluate(t, strings[i]...) ≈ solution[i] atol = 1e-4
+	solutions = ((Levenshtein(), [2  2  4  1  3  0  3  2  3  3  4  6 17  3  3  2]),
+			(DamerauLevenshtein(), [1  2  4  1  3  0  3  2  3  3  4  6 17  2  2  2]),
+			(Jaro(), [0.05555556 0.17777778 0.23333333 0.04166667 1.00000000 0.00000000 1.00000000 0.44444444 0.25396825 0.2805556 0.2285714 0.48809524 0.3916667 0.07407407 0.16666667 0.21666667]),
+			(QGram(1), [0   3   3   1 3  0   6   4   5   4   4  11  14   0   0   3]),
+			(QGram(2), [  6   7   7   1 2 0   4   4   7   8   4  13  32   8   6   5]),
+			(Jaccard(1), [0.0 0.4285714 0.3750000 0.1666667       1.0 0.0 1.0000000 0.6666667 0.5714286 0.3750000 0.2000000 0.8333333 0.5000000 0.0 0.0 0.2500000]),
+			(Jaccard(2),  [ 0.7500000 0.8750000 0.7777778 0.1428571       1.0     NaN 1.0000000 1.0000000 0.7777778 0.8000000 0.3076923 1.0000000 0.9696970 0.6666667 1.0000000 0.8333333]),
+			(Cosine(2), [0.6000000 0.7763932 0.6220355 0.0741799  NaN  NaN 1.0000000 1.0000000 0.6348516 0.6619383 0.1679497 1.0000000 0.9407651 0.5000000 1.0000000 0.7113249]))
+	# Test with R package StringDist
+	for x in solutions
+		t, solution = x
+		for i in 1:length(solution)
+			if isnan(evaluate(t, strings[i]...))
+				@test isnan(solution[i])
+			else
+				@test evaluate(t, strings[i]...) ≈ solution[i] atol = 1e-4
+			end
 		end
 	end
+	# test  RatcliffObershelp
+	solution = [83, 73, 62, 93, 0, 100, 0, 33, 62, 71, 83, 27, 33, 78, 50, 67]
+	for i in eachindex(strings)
+		@test round(Int, (1 - evaluate(RatcliffObershelp(), strings[i]...)) * 100) ≈ solution[i] atol = 1e-4
+	end
 end




-
 #= R test
 library(stringdist)
 strings = matrix(data = c(
@ -174,13 +189,6 @@ stringdist(strings[1,], strings[2,], method = "qgram", q = 1)



-
-# test  RatcliffObershelp
-solution = [83, 73, 62, 93, 0, 100, 0, 33, 62, 71, 83, 27, 33, 78, 50, 67]
-for i in eachindex(strings)
-	@test round(Int, (1 - evaluate(RatcliffObershelp(), strings[i]...)) * 100) ≈ solution[i] atol = 1e-4
-end
-
 #= Fuzzywuzzy usesRatcliffObershelp  if python-Levenshtein not installed, fuzzywuzzy uses RatcliffObershelp)
 from fuzzywuzzy import fuzz
 strings = [
@ -205,4 +213,3 @@ for x in strings:
   print(fuzz.ratio(x[0], x[1]))
 =#

-end
--- a/test/modifiers.jl
+++ b/test/modifiers.jl
@ -3,170 +3,117 @@ using StringDistances, Test

@testset "Modifiers" begin

-# Compare
-@test compare("", "abc", Hamming()) ≈ 0.0 atol = 1e-4
-@test compare("acc", "abc", Hamming()) ≈ 2/3 atol = 1e-4
-@test compare("saturday", "sunday", Hamming()) ≈ 1/8 atol = 1e-4
+	# Hamming
+	@test compare("", "abc", Hamming()) ≈ 0.0 atol = 1e-4
+	@test compare("acc", "abc", Hamming()) ≈ 2/3 atol = 1e-4
+	@test compare("saturday", "sunday", Hamming()) ≈ 1/8 atol = 1e-4
+	@test compare("New York Yankees",  "Yankees", Partial(Hamming())) ≈ 1
+	@test compare("New York Yankees",  "", Partial(Hamming())) ≈ 1
+	compare("aüa", "aua", Hamming())

-@test compare("", "abc", QGram(1)) ≈ 0.0 atol = 1e-4
-@test compare("abc", "cba", QGram(1)) ≈ 1.0 atol = 1e-4
-@test compare("abc", "ccc", QGram(1)) ≈ 1/3 atol = 1e-4
+	# Qgram
+	@test compare("", "abc", QGram(1)) ≈ 0.0 atol = 1e-4
+	@test compare("abc", "cba", QGram(1)) ≈ 1.0 atol = 1e-4
+	@test compare("abc", "ccc", QGram(1)) ≈ 1/3 atol = 1e-4
+	compare("aüa", "aua", TokenMax(QGram(2)))
+	@test compare("", "abc", Jaccard(2)) ≈ 0.0 atol = 1e-4
+	@test compare("martha", "martha", Jaccard(2)) ≈ 1.0 atol = 1e-4
+	@test compare("martha", "martha", Jaccard(2)) ≈ 1.0 atol = 1e-4
+	@test compare("aa", "aa ", Partial(Jaccard(2))) ≈ 1.0
+	@test compare("martha", "martha", Cosine(2)) ≈ 1.0 atol = 1e-4
+	@test compare("martha", "martha", Overlap(2)) ≈ 1.0 atol = 1e-4
+	@test compare("martha", "martha", SorensenDice(2)) ≈ 1.0 atol = 1e-4

-@test compare("", "abc", Jaccard(2)) ≈ 0.0 atol = 1e-4
+	# Jaro
+	compare("aüa", "aua", Jaro())

-@test compare("martha", "martha", Jaccard(2)) ≈ 1.0 atol = 1e-4
-@test compare("martha", "martha", Cosine(2)) ≈ 1.0 atol = 1e-4
-@test compare("martha", "martha", Jaccard(2)) ≈ 1.0 atol = 1e-4
-@test compare("martha", "martha", Overlap(2)) ≈ 1.0 atol = 1e-4
-@test compare("martha", "martha", SorensenDice(2)) ≈ 1.0 atol = 1e-4
+	#Levenshtein
+	compare("aüa", "aua", Levenshtein())
+	compare("aüa", "aua", DamerauLevenshtein())
+
+	# Winkler
+	@test compare("martha", "marhta", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.9611 atol = 1e-4
+	@test compare("dwayne", "duane", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.84 atol = 1e-4
+	@test compare("dixon", "dicksonx", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.81333 atol = 1e-4
+	@test compare("william", "williams", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.975 atol = 1e-4
+	@test compare("", "foo", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.0 atol = 1e-4
+	@test compare("a", "a", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 1.0 atol = 1e-4
+	@test compare("abc", "xyz", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.0 atol = 1e-4
+
+	# RatcliffObershelp
+	@test compare("New York Mets vs Atlanta Braves", "", RatcliffObershelp())  ≈ 0.0
+	@test round(Int, 100 * compare("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。", RatcliffObershelp())) == 5
+	compare("aüa", "aua", TokenMax(RatcliffObershelp()))
+
+	@test compare("New York Yankees",  "Yankees", Partial(RatcliffObershelp())) ≈ 1.0
+	@test compare("New York Yankees",  "", Partial(RatcliffObershelp())) ≈ 0.0
+	@test compare("mariners vs angels", "los angeles angels at seattle mariners", Partial(RatcliffObershelp())) ≈ 0.444444444444
+	@test compare("HSINCHUANG", "SINJHUAN", Partial(RatcliffObershelp())) ≈ 0.875
+	@test compare("HSINCHUANG", "LSINJHUANG DISTRIC", Partial(RatcliffObershelp())) ≈ 0.8
+	@test compare("HSINCHUANG", "SINJHUANG DISTRICT", Partial(RatcliffObershelp())) ≈ 0.8
+	@test compare("HSINCHUANG",  "SINJHUANG", Partial(RatcliffObershelp())) ≈ 0.8888888888888
+	@test compare("New York Mets vs Atlanta Braves", "Atlanta Braves vs New York Mets", TokenSort(RatcliffObershelp()))  ≈ 1.0
+	@test compare("mariners vs angels", "los angeles angels of anaheim at seattle mariners", TokenSet(RatcliffObershelp())) ≈ 1.0 - 0.09090909090909094
+	@test compare("New York Mets vs Atlanta Braves", "", TokenSort(RatcliffObershelp()))  ≈ 0.0
+	@test compare("mariners vs angels", "", TokenSet(RatcliffObershelp())) ≈ 0.0
+	@test compare("mariners vs angels", "los angeles angels at seattle mariners", TokenSet(Partial(RatcliffObershelp()))) ≈ 1.0
+	@test compare("mariners", "mariner", TokenMax(RatcliffObershelp())) ≈ 0.933333333333333
+	@test round(Int, 100 * compare("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。", RatcliffObershelp())) == 5
+	@test round(Int, 100 * compare("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。", Partial(RatcliffObershelp()))) == 7
+	@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow", TokenMax(RatcliffObershelp()))) == 79
+	@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", Partial(RatcliffObershelp()))) == 88
+	@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenSort(RatcliffObershelp()))) == 11
+	@test round(Int, 100 * compare("mariners", "are mariner playing tomorrow", RatcliffObershelp())) == 39
+	@test round(Int, 100 * compare("mariners", "are mariner playing tomorrow", Partial(RatcliffObershelp()))) == 88
+	@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenSet(RatcliffObershelp()))) == 39
+	@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenSet(Partial(RatcliffObershelp())))) == 88
+	# not exactly the same because tokenmax has uses the max of rounded tokenset etc
+	@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenMax(RatcliffObershelp()))) == 52


-# Winkler
-@test compare("martha", "marhta", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.9611 atol = 1e-4
-@test compare("dwayne", "duane", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.84 atol = 1e-4
-@test compare("dixon", "dicksonx", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.81333 atol = 1e-4
-@test compare("william", "williams", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.975 atol = 1e-4
-@test compare("", "foo", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.0 atol = 1e-4
-@test compare("a", "a", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 1.0 atol = 1e-4
-@test compare("abc", "xyz", Winkler(Jaro(), 0.1, 0.0, 4)) ≈ 0.0 atol = 1e-4
+	# check missing
+	@test compare("ok", missing, Levenshtein()) === missing
+
+	# check min
+	strings = [
+	("martha", "marhta"),
+	("dwayne", "duane") ,
+	("dixon", "dicksonx"),
+	("william", "williams"),
+	("", "foo"),
+	("a", "a"),
+	("abc", "xyz"),
+	("abc", "ccc"),
+	("kitten", "sitting"),
+	("saturday", "sunday"),
+	("hi, my name is", "my name is"),
+	("alborgów", "amoniak"),
+	("cape sand recycling ", "edith ann graham"),
+	( "jellyifhs", "jellyfish"),
+	("ifhs", "fish"),
+	("leia", "leela"),
+	]
+	for dist in (Levenshtein, DamerauLevenshtein)
+		for i in eachindex(strings)
+			if compare(strings[i]..., dist()) <  1 / 3
+				@test compare(strings[i]..., dist() ; min_score = 1/ 3) ≈ 0.0
+			else
+				@test compare(strings[i]..., dist() ; min_score = 1/ 3) ≈ compare(strings[i]..., dist())
+			end
+		end
+	end
+
+	# check find_best and find_all
+	@test find_best("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == "NewYork"
+	@test find_best("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == "NewYork"
+	@test find_all("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == ["NewYork"]
+	@test find_all("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == ["NewYork", "Newark"]

-strings = [
-("martha", "marhta"),
-("dwayne", "duane") ,
-("dixon", "dicksonx"),
-("william", "williams"),
-("", "foo")
-]
-solutions = [0.03888889 0.16000000 0.18666667 0.02500000 1.00000000]
-for i in 1:length(solutions)
-	@test compare(strings[i]..., Winkler(Jaro(), 0.1, 0.0, 4)) ≈ (1 - solutions[i]) atol = 1e-4
 end


-
-
-
-# Partial
-@test compare("aa", "aa ", Partial(Jaccard(2))) ≈ 1.0
-
-@test compare("New York Yankees",  "Yankees", Partial(RatcliffObershelp())) ≈ 1.0
-@test compare("New York Yankees",  "", Partial(RatcliffObershelp())) ≈ 0.0
-@test compare("mariners vs angels", "los angeles angels at seattle mariners", Partial(RatcliffObershelp())) ≈ 0.444444444444
-
-
-s = "HSINCHUANG"
-@test compare(s, "SINJHUAN", Partial(RatcliffObershelp())) ≈ 0.875
-@test compare(s, "LSINJHUANG DISTRIC", Partial(RatcliffObershelp())) ≈ 0.8
-@test compare(s, "SINJHUANG DISTRICT", Partial(RatcliffObershelp())) ≈ 0.8
-@test compare(s,  "SINJHUANG", Partial(RatcliffObershelp())) ≈ 0.8888888888888
-
-@test compare("New York Yankees",  "Yankees", Partial(Hamming())) ≈ 1
-@test compare("New York Yankees",  "", Partial(Hamming())) ≈ 1
-
-
-
-# Token
-@test compare("New York Mets vs Atlanta Braves", "Atlanta Braves vs New York Mets", TokenSort(RatcliffObershelp()))  ≈ 1.0
-@test compare("mariners vs angels", "los angeles angels of anaheim at seattle mariners", TokenSet(RatcliffObershelp())) ≈ 1.0 - 0.09090909090909094
-
-
-@test compare("New York Mets vs Atlanta Braves", "", RatcliffObershelp())  ≈ 0.0
-
-
-@test compare("New York Mets vs Atlanta Braves", "", TokenSort(RatcliffObershelp()))  ≈ 0.0
-
-# ADD AGAIN
-@test compare("mariners vs angels", "", TokenSet(RatcliffObershelp())) ≈ 0.0
-
-
-
-
-@test compare("mariners", "mariner", TokenMax(RatcliffObershelp())) ≈ 0.933333333333333
-
-
-
-#@test_approx_eq compare(TokenSort(RatcliffObershelp()), graphemeiterator("New York Mets vs Atlanta Braves"), graphemeiterator("Atlanta Braves vs New York Mets"))  1.0
-#@test_approx_eq compare(TokenSet(RatcliffObershelp()),graphemeiterator("mariners vs angels"), graphemeiterator("los angeles angels of anaheim at seattle mariners")) 1.0 - 0.09090909090909094
-#@test_approx_eq compare(TokenSort(RatcliffObershelp()), graphemeiterator("New York Mets vs Atlanta Braves"), graphemeiterator(""))  0.0
-#@test_approx_eq compare(TokenSet(RatcliffObershelp()),graphemeiterator("mariners vs angels"), graphemeiterator("")) 0.0
-
-
-
-@test compare("mariners vs angels", "los angeles angels at seattle mariners", TokenSet(Partial(RatcliffObershelp()))) ≈ 1.0
-
-
-@test round(Int, 100 * compare("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。", RatcliffObershelp())) == 5
-
-
-# test with fuzz ratio
-@test round(Int, 100 * compare("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。", RatcliffObershelp())) == 5
-@test round(Int, 100 * compare("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。", Partial(RatcliffObershelp()))) == 7
-@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow", TokenMax(RatcliffObershelp()))) == 79
-@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", Partial(RatcliffObershelp()))) == 88
-@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenSort(RatcliffObershelp()))) == 11
-@test round(Int, 100 * compare("mariners", "are mariner playing tomorrow", RatcliffObershelp())) == 39
-@test round(Int, 100 * compare("mariners", "are mariner playing tomorrow", Partial(RatcliffObershelp()))) == 88
-@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenSet(RatcliffObershelp()))) == 39
-@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenSet(Partial(RatcliffObershelp())))) == 88
-# not exactly the same because tokenmax has uses the max of rounded tokenset etc
-@test round(Int, 100 * compare("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow", TokenMax(RatcliffObershelp()))) == 52
-
 #= Python code
 from fuzzywuzzy import fuzz
 fuzz.ratio("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。")
 fuzz.partial_ratio("为人子女者要堂堂正正做人，千万不可作奸犯科，致使父母蒙羞", "此前稍早些时候中国商务部发布消息称，中美经贸高级别磋商双方牵头人通话，中方就美拟9月1日加征关税进行了严正交涉。")
-fuzz.WRatio("mariners", "mariner are playing tomorrow")
-fuzz.partial_ratio("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow")
-fuzz.token_sort_ratio("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow")
-fuzz.token_set_ratio("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow")
-fuzz.partial_token_set_ratio("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow")
-fuzz.WRatio("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow")
-fuzz.WRatio("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow")
-fuzz.WRatio("mariners", "mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow mariner are playing tomorrow")
-=#
-
-
-using StringDistances, Test
-
-# check with weird utf8 strings
-compare("aüa", "aua", TokenMax(RatcliffObershelp()))
-compare("aüa", "aua", TokenMax(QGram(2)))
-compare("aüa", "aua", DamerauLevenshtein())
-compare("aüa", "aua", Hamming())
-compare("aüa", "aua", Jaro())
-compare("aüa", "aua", Levenshtein())
-
-
-s1 = "aü☃"
-s2 = "aüaüafs"
-dist = QGram(4)
-@test evaluate(dist, s1, s2) == 4
-
-# check Substrings work
-s1 = SubString(s1, 1, 4)
-s2 = SubString(s2, 1, 4)
-dist = QGram(2)
-@test evaluate(dist, s1, s2) == 2
-
-
-# check missing
-@test compare(s1, missing, Levenshtein()) === missing
-
-# check min
-for dist in (Levenshtein, DamerauLevenshtein)
-	for i in eachindex(strings)
-		if compare(strings[i]..., dist()) <  1 / 3
-			@test compare(strings[i]..., dist() ; min_score = 1/ 3) ≈ 0.0
-			else
-			@test compare(strings[i]..., dist() ; min_score = 1/ 3) ≈ compare(strings[i]..., dist())
-		end
-	end
-end
-
-# check find_best and find_all
-@test find_best("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == "NewYork"
-@test find_best("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == "NewYork"
-@test find_all("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == ["NewYork"]
-@test find_all("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == ["NewYork", "Newark"]
-
-end
+=#