StringDistances.jl/src/StringDistances.jl

58 lines
1.1 KiB
Julia
Raw Normal View History

2015-10-22 18:12:44 +02:00
module StringDistances
2019-12-12 15:38:20 +01:00
using Distances
2019-12-12 20:48:52 +01:00
include("utils.jl")
include("edit.jl")
include("qgram.jl")
2020-02-13 15:48:35 +01:00
include("normalize.jl")
2020-02-12 15:41:46 +01:00
const StringDistance = Union{Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax, Normalize}
2020-02-13 15:44:27 +01:00
Distances.result_type(dist::StringDistance, s1, s2) = typeof(dist("", ""))
2020-02-13 15:48:35 +01:00
"""
compare(s1, s2, dist)
2020-02-13 15:44:27 +01:00
2020-02-13 15:48:35 +01:00
return a similarity score between 0 and 1 for the strings `s1` and
`s2` based on the distance `dist`.
2020-02-13 15:44:27 +01:00
2020-02-13 15:48:35 +01:00
### Examples
```julia-repl
julia> compare("martha", "marhta", Levenshtein())
0.6666666666666667
```
"""
compare(s1, s2, dist::StringDistance; min_score = 0.0) = 1 - normalize(dist)(s1, s2, 1 - min_score)
2020-02-13 15:44:27 +01:00
include("find.jl")
2020-02-08 17:49:53 +01:00
2015-10-22 18:12:44 +02:00
##############################################################################
##
## Export
##
##############################################################################
2019-12-12 15:38:20 +01:00
2015-11-04 18:40:30 +01:00
export
2019-12-12 20:48:52 +01:00
StringDistance,
2015-11-04 18:40:30 +01:00
Levenshtein,
DamerauLevenshtein,
Jaro,
2019-08-17 18:57:35 +02:00
RatcliffObershelp,
2015-11-04 18:40:30 +01:00
QGram,
Cosine,
Jaccard,
2015-11-05 16:51:32 +01:00
SorensenDice,
Overlap,
2015-11-04 18:40:30 +01:00
Winkler,
Partial,
TokenSort,
2015-11-06 16:47:15 +01:00
TokenSet,
2019-08-17 18:15:14 +02:00
TokenMax,
2019-12-12 20:48:52 +01:00
evaluate,
compare,
result_type,
2020-02-09 19:42:29 +01:00
qgrams,
normalize
2015-11-06 03:03:45 +01:00
end