diff --git a/.travis.yml b/.travis.yml index f74e2c9..45353e4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ os: - linux julia: - 1.0 + - 1.5 - nightly matrix: allow_failures: diff --git a/README.md b/README.md index 7b0548d..02d763a 100644 --- a/README.md +++ b/README.md @@ -58,9 +58,9 @@ compare("martha", "martha", Levenshtein()) ### Find -- `findbest` returns the value and index of the element in `itr` with the highest similarity score with `s`. Its syntax is: +- `findclosest` returns the value and index of the element in `itr` with the lowest distance with `s`. Its syntax is: ```julia - findbest(s, itr, dist::StringDistance; min_score = 0.0) + findclosest(s, itr, dist::StringDistance; min_score = 0.0) ``` - `findall` returns the indices of all elements in `itr` with a similarity score with `s` higher than a minimum value (default to 0.8). Its syntax is: @@ -68,7 +68,7 @@ compare("martha", "martha", Levenshtein()) findall(s, itr, dist::StringDistance; min_score = 0.8) ``` -The functions `findbest` and `findall` are particularly optimized for `Levenshtein` and `DamerauLevenshtein` distances (as well as their modifications via `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`). +The functions `findclosest` and `findall` are particularly optimized for `Levenshtein` and `DamerauLevenshtein` distances (as well as their modifications via `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`). ## References diff --git a/src/StringDistances.jl b/src/StringDistances.jl index 1c0423f..31e1261 100755 --- a/src/StringDistances.jl +++ b/src/StringDistances.jl @@ -54,6 +54,6 @@ compare, result_type, qgrams, normalize, -findbest +findclosest end diff --git a/src/find.jl b/src/find.jl index db5353f..16b97ea 100755 --- a/src/find.jl +++ b/src/find.jl @@ -1,7 +1,7 @@ """ - findbest(s, itr, dist::StringDistance; min_score = 0.0) -> (x, index) + findclosest(s, itr, dist::StringDistance; min_score = 0.0) -> (x, index) -`findbest` returns the value and index of the element of `itr` that has the +`findclosest` returns the value and index of the element of `itr` that has the highest similarity score with `s` according to the distance `dist`. It returns `(nothing, nothing)` if none of the elements has a similarity score higher or equal to `min_score` (default to 0.0). @@ -14,13 +14,13 @@ It is particularly optimized for [`Levenshtein`](@ref) and [`DamerauLevenshtein` julia> using StringDistances julia> s = "Newark" julia> iter = ["New York", "Princeton", "San Francisco"] -julia> findbest(s, iter, Levenshtein()) +julia> findclosest(s, iter, Levenshtein()) ("NewYork", 1) -julia> findbest(s, iter, Levenshtein(); min_score = 0.9) +julia> findclosest(s, iter, Levenshtein(); min_score = 0.9) (nothing, nothing) ``` """ -function findbest(s, itr, dist::StringDistance; min_score = 0.0) +function findclosest(s, itr, dist::StringDistance; min_score = 0.0) min_score_atomic = Threads.Atomic{typeof(min_score)}(min_score) scores = [0.0 for _ in 1:Threads.nthreads()] is = [0 for _ in 1:Threads.nthreads()] @@ -39,8 +39,8 @@ end function Base.findmax(s, itr, dist::StringDistance; min_score = 0.0) - @warn "findmax(s, itr, dist; min_score) is deprecated. Use findbest(s, itr, dist; min_score)" - findbest(s, itr, dist; min_score = min_score) + @warn "findmax(s, itr, dist; min_score) is deprecated. Use findclosest(s, itr, dist; min_score)" + findclosest(s, itr, dist; min_score = min_score) end """ findall(s, itr , dist::StringDistance; min_score = 0.8) diff --git a/test/modifiers.jl b/test/modifiers.jl index e110365..c09f8a7 100644 --- a/test/modifiers.jl +++ b/test/modifiers.jl @@ -99,18 +99,18 @@ using StringDistances, Unicode, Test end # check find_best and find_all - @test findbest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == ("NewYork", 1) - @test findbest("New York", ["San Francisco", "NewYork", "Newark"], Levenshtein()) == ("NewYork", 2) - @test findbest("New York", ["Newark", "San Francisco", "NewYork"], Levenshtein()) == ("NewYork", 3) + @test findclosest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == ("NewYork", 1) + @test findclosest("New York", ["San Francisco", "NewYork", "Newark"], Levenshtein()) == ("NewYork", 2) + @test findclosest("New York", ["Newark", "San Francisco", "NewYork"], Levenshtein()) == ("NewYork", 3) - @test findbest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein(); min_score = 0.99) == (nothing, nothing) - @test findbest("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == ("NewYork", 1) + @test findclosest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein(); min_score = 0.99) == (nothing, nothing) + @test findclosest("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == ("NewYork", 1) @test findall("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == [1] @test findall("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == [1, 2] @test findall("New York", ["NewYork", "Newark", "San Francisco"], Jaro(); min_score = 0.99) == Int[] if VERSION >= v"1.2.0" - @test findbest("New York", skipmissing(["NewYork", "Newark", missing]), Levenshtein()) == ("NewYork", 1) - @test findbest("New York", skipmissing(Union{AbstractString, Missing}[missing, missing]), Levenshtein()) == (nothing, nothing) + @test findclosest("New York", skipmissing(["NewYork", "Newark", missing]), Levenshtein()) == ("NewYork", 1) + @test findclosest("New York", skipmissing(Union{AbstractString, Missing}[missing, missing]), Levenshtein()) == (nothing, nothing) @test findall("New York", skipmissing(["NewYork", "Newark", missing]), Levenshtein()) == [1] @test findall("New York", skipmissing(Union{AbstractString, Missing}[missing, missing]), Levenshtein()) == [] end