pull/23/head
matthieugomez 2020-02-08 12:00:44 -05:00
parent 39ad98deb3
commit 30fb47e9bf
2 changed files with 10 additions and 9 deletions

View File

@ -7,8 +7,9 @@ import Distances: evaluate, result_type
include("utils.jl") include("utils.jl")
include("edit.jl") include("edit.jl")
include("qgram.jl") include("qgram.jl")
const StringDistance = Union{Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance}
include("compare.jl") include("compare.jl")
const StringDistance = Union{Jaro, Levenshtein, DamerauLevenshtein, RatcliffObershelp, QGramDistance, Winkler, Partial, TokenSort, TokenSet, TokenMax}
include("find.jl") include("find.jl")
############################################################################## ##############################################################################

View File

@ -1,8 +1,8 @@
""" """
compare(s1::AbstractString, s2::AbstractString, dist::StringDistance) compare(s1, s2, dist)
return a similarity score between 0 and 1 for the strings `s1` and return a similarity score between 0 and 1 for the strings `s1` and
`s2` based on the string distance `dist`. `s2` based on the distance `dist`.
### Examples ### Examples
```julia-repl ```julia-repl
@ -38,7 +38,7 @@ function compare(s1, s2, dist::QGramDistance; min_score = 0.0)
end end
""" """
Winkler(dist::StringDistance; p::Real = 0.1, threshold::Real = 0.7, maxlength::Integer = 4) Winkler(dist; p::Real = 0.1, threshold::Real = 0.7, maxlength::Integer = 4)
Creates the `Winkler{dist, p, threshold, maxlength}` distance Creates the `Winkler{dist, p, threshold, maxlength}` distance
@ -54,7 +54,7 @@ struct Winkler{S <: SemiMetric} <: SemiMetric
maxlength::Integer # max length of common prefix. Default to 4 maxlength::Integer # max length of common prefix. Default to 4
end end
function Winkler(dist::StringDistance; p = 0.1, threshold = 0.7, maxlength = 4) function Winkler(dist; p = 0.1, threshold = 0.7, maxlength = 4)
p * maxlength <= 1 || throw("scaling factor times maxlength of common prefix must be lower than one") p * maxlength <= 1 || throw("scaling factor times maxlength of common prefix must be lower than one")
Winkler(dist, 0.1, 0.7, 4) Winkler(dist, 0.1, 0.7, 4)
end end
@ -71,7 +71,7 @@ end
""" """
Partial(dist::StringDistance) Partial(dist)
Creates the `Partial{dist}` distance Creates the `Partial{dist}` distance
@ -129,7 +129,7 @@ function compare(s1::AbstractString, s2::AbstractString, dist::Partial{RatcliffO
end end
""" """
TokenSort(dist::StringDistance) TokenSort(dist)
Creates the `TokenSort{dist}` distance Creates the `TokenSort{dist}` distance
@ -158,7 +158,7 @@ end
""" """
TokenSet(dist::StringDistance) TokenSet(dist)
Creates the `TokenSet{dist}` distance Creates the `TokenSet{dist}` distance
@ -196,7 +196,7 @@ end
""" """
TokenMax(dist::StringDistance) TokenMax(dist)
Creates the `TokenMax{dist}` distance Creates the `TokenMax{dist}` distance