findmax -> findnearest
parent
b2399a0ef7
commit
ac783773ba
|
@ -1,6 +1,6 @@
|
||||||
name = "StringDistances"
|
name = "StringDistances"
|
||||||
uuid = "88034a9c-02f8-509d-84a9-84ec65e18404"
|
uuid = "88034a9c-02f8-509d-84a9-84ec65e18404"
|
||||||
version = "0.8"
|
version = "0.9.0"
|
||||||
|
|
||||||
[deps]
|
[deps]
|
||||||
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
|
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
|
||||||
|
|
|
@ -2,31 +2,17 @@ module StringDistances
|
||||||
|
|
||||||
using Distances
|
using Distances
|
||||||
|
|
||||||
include("utils.jl")
|
include("distances/utils.jl")
|
||||||
include("edit.jl")
|
include("distances/edit.jl")
|
||||||
include("qgram.jl")
|
include("distances/qgram.jl")
|
||||||
include("normalize.jl")
|
include("normalize.jl")
|
||||||
|
|
||||||
const StringDistance = Union{Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax, Normalize}
|
const StringDistance = Union{Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax, Normalize}
|
||||||
# Distances API
|
# Distances API
|
||||||
Distances.result_type(dist::StringDistance, s1, s2) = typeof(dist("", ""))
|
Distances.result_type(dist::StringDistance, s1, s2) = typeof(dist("", ""))
|
||||||
|
|
||||||
"""
|
|
||||||
compare(s1, s2, dist)
|
|
||||||
|
|
||||||
return a similarity score between 0 and 1 for the strings `s1` and
|
|
||||||
`s2` based on the distance `dist`.
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
```julia-repl
|
|
||||||
julia> compare("martha", "marhta", Levenshtein())
|
|
||||||
0.6666666666666667
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
compare(s1, s2, dist::StringDistance; min_score = 0.0) = 1 - normalize(dist)(s1, s2, 1 - min_score)
|
|
||||||
|
|
||||||
include("find.jl")
|
include("find.jl")
|
||||||
|
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
##
|
##
|
||||||
## Export
|
## Export
|
||||||
|
|
15
src/find.jl
15
src/find.jl
|
@ -1,3 +1,18 @@
|
||||||
|
"""
|
||||||
|
compare(s1, s2, dist)
|
||||||
|
|
||||||
|
return a similarity score between 0 and 1 for the strings `s1` and
|
||||||
|
`s2` based on the distance `dist`.
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
```julia-repl
|
||||||
|
julia> compare("martha", "marhta", Levenshtein())
|
||||||
|
0.6666666666666667
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
compare(s1, s2, dist::StringDistance; min_score = 0.0) = 1 - normalize(dist)(s1, s2, 1 - min_score)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
findnearest(s, itr, dist::StringDistance; min_score = 0.0) -> (x, index)
|
findnearest(s, itr, dist::StringDistance; min_score = 0.0) -> (x, index)
|
||||||
|
|
||||||
|
|
|
@ -259,4 +259,6 @@ function (dist::Winkler)(s1, s2, max_dist = 1.0)
|
||||||
out -= min(l, dist.maxlength) * dist.p * out
|
out -= min(l, dist.maxlength) * dist.p * out
|
||||||
end
|
end
|
||||||
out > max_dist ? 1.0 : out
|
out > max_dist ? 1.0 : out
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue