From e4f50dcda7f663f35af163332036258c7acb586e Mon Sep 17 00:00:00 2001
From: matthieugomez <gomez.matthieu@gmail.com>
Date: Fri, 13 Dec 2019 09:14:36 -0500
Subject: [PATCH] update

---
 README.md   | 36 ++++++++++++++++++------------------
 src/find.jl |  4 +---
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index d0fb3bf..ffddda1 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ The package is registered in the [`General`](https://github.com/JuliaRegistries/
 The function `compare` returns a similarity score between two strings. The function always returns a score between 0 and 1, with a value of 0 being completely different and a value of 1 being completely similar. Its syntax is:
 
 ```julia
-compare(::AbstractString, ::AbstractString, ::StringDistance)
+compare(s1::AbstractString, s2::AbstractString, dist::StringDistance)
 ```
 
 - Edit Distances
@@ -19,11 +19,11 @@ compare(::AbstractString, ::AbstractString, ::StringDistance)
 	- [Damerau-Levenshtein Distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) `DamerauLevenshtein()`
 	- [RatcliffObershelp Distance](https://xlinux.nist.gov/dads/HTML/ratcliffObershelp.html) `RatcliffObershelp()`
 - Q-gram distances compare the set of all substrings of length `q` in each string.
-	- QGram Distance `Qgram(q)`
-	- [Cosine Distance](https://en.wikipedia.org/wiki/Cosine_similarity) `Cosine(q)`
-	- [Jaccard Distance](https://en.wikipedia.org/wiki/Jaccard_index) `Jaccard(q)`
-	- [Overlap Distance](https://en.wikipedia.org/wiki/Overlap_coefficient) `Overlap(q)`
-	- [Sorensen-Dice Distance](https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient) `SorensenDice(q)`
+	- QGram Distance `Qgram(q::Int)`
+	- [Cosine Distance](https://en.wikipedia.org/wiki/Cosine_similarity) `Cosine(q::Int)`
+	- [Jaccard Distance](https://en.wikipedia.org/wiki/Jaccard_index) `Jaccard(q::Int)`
+	- [Overlap Distance](https://en.wikipedia.org/wiki/Overlap_coefficient) `Overlap(q::Int)`
+	- [Sorensen-Dice Distance](https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient) `SorensenDice(q::Int)`
 
 - The package includes distance "modifiers", that can be applied to any distance.
 
@@ -37,26 +37,26 @@ Some examples:
 ```julia
 compare("martha", "marhta", Jaro())
 compare("martha", "marhta", Winkler(Jaro()))
-compare("william", "williams", QGram(2))
-compare("william", "williams", Winkler(QGram(2)))
-compare("New York Yankees", "Yankees", Levenshtein())
-compare("New York Yankees", "Yankees", Partial(Levenshtein()))
-compare("mariners vs angels", "los angeles angels at seattle mariners", Jaro())
-compare("mariners vs angels", "los angeles angels at seattle mariners", TokenSet(Jaro()))
-compare("mariners vs angels", "los angeles angels at seattle mariners", TokenMax(RatcliffObershelp()))
+compare("martha", "marhta", QGram(2))
+compare("martha", "marhta", Winkler(QGram(2)))
+compare("martha", "marhta", Levenshtein())
+compare("martha", "marhta", Partial(Levenshtein()))
+compare("martha", "marhta", Jaro())
+compare("martha", "marhta", TokenSet(Jaro()))
+compare("martha", "marhta", TokenMax(RatcliffObershelp()))
 ```
 
-In case the word order does not matter, a good distance is `TokenMax(Levenshtein())`
+In case the word order does not matter, a good distance is `TokenMax(Levenshtein())` (see [fuzzywuzzy](http://chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/)).
 
 ## Find
-- `findmax` returns the value and index of the element in `itr` with the highest similarity score with `x`. Its syntax is:
+- `findmax` returns the value and index of the element in `itr` with the highest similarity score with `s`. Its syntax is:
 	```julia
-	findmax(x::AbstractString, itr, dist::StringDistance; min_score = 0.0)
+	findmax(s::AbstractString, itr, dist::StringDistance; min_score = 0.0)
 	```
 
-- `findall` returns the indices of all elements in `itr` with a similarity score with `x` higher than a minimum value (default to 0.8). Its syntax is:
+- `findall` returns the indices of all elements in `itr` with a similarity score with `s` higher than a minimum value (default to 0.8). Its syntax is:
 	```julia
-	findall(x::AbstractString, itr, dist::StringDistance; min_score = 0.8)
+	findall(s::AbstractString, itr, dist::StringDistance; min_score = 0.8)
 	```
 
 The functions `findmax` and `findall` are particularly optimized for `Levenshtein` and `DamerauLevenshtein` distances (as well as their modifications via `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`).
diff --git a/src/find.jl b/src/find.jl
index c82cff2..2ddf485 100755
--- a/src/find.jl
+++ b/src/find.jl
@@ -11,19 +11,17 @@ The function is optimized for `Levenshtein` and `DamerauLevenshtein` distances
 function Base.findmax(s::AbstractString, itr, dist::StringDistance; min_score = 0.0)
     vmin = Threads.Atomic{typeof(min_score)}(min_score)
     vs = [0.0 for _ in 1:Threads.nthreads()]
-    xs = eltype(itr)["" for _ in 1:Threads.nthreads()]
     is = [0 for _ in 1:Threads.nthreads()]
     Threads.@threads for i in collect(keys(itr))
         v = compare(s, itr[i], dist; min_score = vmin[])
         v_old = Threads.atomic_max!(vmin, v)
         if v >= v_old
             vs[Threads.threadid()] = v
-            xs[Threads.threadid()] = itr[i]
             is[Threads.threadid()] = i
         end
     end
     i = argmax(vs)
-    is[i] == 0 ? (nothing, nothing) : (xs[i], is[i])
+    is[i] == 0 ? (nothing, nothing) : (itr[is[i]], is[i])
 end