diff --git a/src/find.jl b/src/find.jl index ab15843..300c24a 100755 --- a/src/find.jl +++ b/src/find.jl @@ -1,37 +1,50 @@ """ - findmax(s::AbstractString, itr, dist::StringDistance; min_score = 0.0) + findmax(s::AbstractString, itr, dist::StringDistance; min_score = 0.0) -> (x, index) `findmax` returns the value and index of the element of `itr` that has the highest similarity score with `s` according to the distance `dist`. It returns `(nothing, nothing)` if none of the elements has a similarity score -higher or equal to `min_score` (default to 0.0) +higher or equal to `min_score` (default to 0.0). The function is optimized for `Levenshtein` and `DamerauLevenshtein` distances -(potentially modified by `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`) +(as well as their modifications via `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`). """ function Base.findmax(s::AbstractString, itr, dist::StringDistance; min_score = 0.0) - vmin = Threads.Atomic{typeof(min_score)}(min_score) - vs = [0.0 for _ in 1:Threads.nthreads()] + min_score = Threads.Atomic{typeof(min_score)}(min_score) + scores = [0.0 for _ in 1:Threads.nthreads()] is = [0 for _ in 1:Threads.nthreads()] Threads.@threads for i in collect(keys(itr)) - v = compare(s, itr[i], dist; min_score = vmin[]) - v_old = Threads.atomic_max!(vmin, v) - if v >= v_old - vs[Threads.threadid()] = v + score = compare(s, itr[i], dist; min_score = min_score[]) + score_old = Threads.atomic_max!(min_score, score) + if score >= score_old + scores[Threads.threadid()] = score is[Threads.threadid()] = i end end - imax = is[argmax(vs)] + imax = is[argmax(scores)] imax == 0 ? (nothing, nothing) : (itr[imax], imax) end +""" + argmax(s::AbstractString, itr, dist::StringDistance; min_score = 0.0) +`argmax` returns the index of the element of `itr` that has the +highest similarity score with `s` according to the distance `dist`. +It returns `nothing` if none of the elements has a similarity score +higher or equal to `min_score` (default to 0.0). +The function is optimized for `Levenshtein` and `DamerauLevenshtein` distances +(potentially modified by `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`) +""" +function Base.argmax(s::AbstractString, itr, dist::StringDistance; min_score = 0.0) + findmax(s, itr, dist; min_score = min_score)[2] +end """ findall(s::AbstractString, itr, dist::StringDistance; min_score = 0.8) `findall` returns the vector of indices for elements of `itr` that have a -similarity score higher or equal than `min_score` according to the distance `dist`. +similarity score higher or equal than `min_score` according to the distance `dist`. +If there are no such elements, return an empty array. The function is optimized for `Levenshtein` and `DamerauLevenshtein` distances -(potentially modified by `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`) +(as well as their modifications via `Partial`, `TokenSort`, `TokenSet`, or `TokenMax`). """ function Base.findall(s::AbstractString, itr, dist::StringDistance; min_score = 0.8) out = [Int[] for _ in 1:Threads.nthreads()]