findclosest

2020-09-28 14:55:18 -07:00 · 2020-09-28 14:55:18 -07:00 · e6898f5274
parent 46ae721329
commit e6898f5274
5 changed files with 19 additions and 18 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -3,6 +3,7 @@ os:
  - linux
 julia:
  - 1.0
+  - 1.5
  - nightly
 matrix:
  allow_failures:
--- a/README.md
+++ b/README.md
@ -58,9 +58,9 @@ compare("martha", "martha", Levenshtein())


 ### Find
- `findbest` returns the value and index of the element in `itr` with the highest similarity score with `s`. Its syntax is:
+- `findclosest` returns the value and index of the element in `itr` with the lowest distance with `s`. Its syntax is:
 	```julia
-	findbest(s, itr, dist::StringDistance; min_score = 0.0)
+	findclosest(s, itr, dist::StringDistance; min_score = 0.0)
 	```

 - `findall` returns the indices of all elements in `itr` with a similarity score with `s` higher than a minimum value (default to 0.8). Its syntax is:
@ -68,7 +68,7 @@ compare("martha", "martha", Levenshtein())
 	findall(s, itr, dist::StringDistance; min_score = 0.8)
 	```

-The functions `findbest` and `findall` are particularly optimized for `Levenshtein` and `DamerauLevenshtein` distances (as well as their modifications via `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`).
+The functions `findclosest` and `findall` are particularly optimized for `Levenshtein` and `DamerauLevenshtein` distances (as well as their modifications via `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`).


 ## References
--- a/src/StringDistances.jl
+++ b/src/StringDistances.jl
@ -54,6 +54,6 @@ compare,
 result_type,
 qgrams,
 normalize,
-findbest
+findclosest
 end

--- a/src/find.jl
+++ b/src/find.jl
@ -1,7 +1,7 @@
 """
-    findbest(s, itr, dist::StringDistance; min_score = 0.0) -> (x, index)
+    findclosest(s, itr, dist::StringDistance; min_score = 0.0) -> (x, index)

-`findbest` returns the value and index of the element of `itr` that has the 
+`findclosest` returns the value and index of the element of `itr` that has the 
 highest similarity score with `s` according to the distance `dist`. 
 It returns `(nothing, nothing)` if none of the elements has a similarity score 
 higher or equal to `min_score` (default to 0.0).
@ -14,13 +14,13 @@ It is particularly optimized for [`Levenshtein`](@ref) and [`DamerauLevenshtein`
 julia> using StringDistances
 julia> s = "Newark"
 julia> iter = ["New York", "Princeton", "San Francisco"]
-julia> findbest(s, iter, Levenshtein())
+julia> findclosest(s, iter, Levenshtein())
 ("NewYork", 1)
-julia> findbest(s, iter, Levenshtein(); min_score = 0.9)
+julia> findclosest(s, iter, Levenshtein(); min_score = 0.9)
 (nothing, nothing)
 ```
 """
-function findbest(s, itr, dist::StringDistance; min_score = 0.0)
+function findclosest(s, itr, dist::StringDistance; min_score = 0.0)
    min_score_atomic = Threads.Atomic{typeof(min_score)}(min_score)
    scores = [0.0 for _ in 1:Threads.nthreads()]
    is = [0 for _ in 1:Threads.nthreads()]
@ -39,8 +39,8 @@ end


 function Base.findmax(s, itr, dist::StringDistance; min_score = 0.0)
-    @warn "findmax(s, itr, dist; min_score) is deprecated. Use findbest(s, itr, dist; min_score)"
-    findbest(s, itr, dist; min_score = min_score)
+    @warn "findmax(s, itr, dist; min_score) is deprecated. Use findclosest(s, itr, dist; min_score)"
+    findclosest(s, itr, dist; min_score = min_score)
 end
 """
    findall(s, itr , dist::StringDistance; min_score = 0.8)
--- a/test/modifiers.jl
+++ b/test/modifiers.jl
@ -99,18 +99,18 @@ using StringDistances, Unicode, Test
 	end

 	# check find_best and find_all
-	@test findbest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == ("NewYork", 1)
-	@test findbest("New York", ["San Francisco", "NewYork", "Newark"], Levenshtein()) == ("NewYork", 2)
-	@test findbest("New York", ["Newark", "San Francisco", "NewYork"], Levenshtein()) == ("NewYork", 3)
+	@test findclosest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == ("NewYork", 1)
+	@test findclosest("New York", ["San Francisco", "NewYork", "Newark"], Levenshtein()) == ("NewYork", 2)
+	@test findclosest("New York", ["Newark", "San Francisco", "NewYork"], Levenshtein()) == ("NewYork", 3)

-	@test findbest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein(); min_score = 0.99) == (nothing, nothing)
-	@test findbest("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == ("NewYork", 1)
+	@test findclosest("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein(); min_score = 0.99) == (nothing, nothing)
+	@test findclosest("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == ("NewYork", 1)
 	@test findall("New York", ["NewYork", "Newark", "San Francisco"], Levenshtein()) == [1]
 	@test findall("New York", ["NewYork", "Newark", "San Francisco"], Jaro()) == [1, 2]
 	@test findall("New York", ["NewYork", "Newark", "San Francisco"], Jaro(); min_score = 0.99) == Int[]
 	if VERSION >= v"1.2.0"
-		@test findbest("New York", skipmissing(["NewYork", "Newark", missing]), Levenshtein()) == ("NewYork", 1)
-		@test findbest("New York", skipmissing(Union{AbstractString, Missing}[missing, missing]), Levenshtein()) == (nothing, nothing)
+		@test findclosest("New York", skipmissing(["NewYork", "Newark", missing]), Levenshtein()) == ("NewYork", 1)
+		@test findclosest("New York", skipmissing(Union{AbstractString, Missing}[missing, missing]), Levenshtein()) == (nothing, nothing)
 		@test findall("New York", skipmissing(["NewYork", "Newark", missing]), Levenshtein()) == [1]
 		@test findall("New York", skipmissing(Union{AbstractString, Missing}[missing, missing]), Levenshtein()) == []
 	end