parent
0f6c814b26
commit
bed352eef7
|
@ -96,6 +96,15 @@ The package includes distance modifiers:
|
||||||
#> 0.855
|
#> 0.855
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Unicode
|
||||||
|
To iterate on graphemes rather than characters, use `graphemeiterator`:
|
||||||
|
|
||||||
|
```julia
|
||||||
|
evaluate(Hamming(), "b\u0300", "a")
|
||||||
|
#> 2
|
||||||
|
evaluate(Hamming(), graphemeiterator("b\u0300"), graphemeiterator("a"))
|
||||||
|
#> 1
|
||||||
|
```
|
||||||
## References
|
## References
|
||||||
- [The stringdist Package for Approximate String Matching](https://journal.r-project.org/archive/2014-1/loo.pdf) Mark P.J. van der Loo
|
- [The stringdist Package for Approximate String Matching](https://journal.r-project.org/archive/2014-1/loo.pdf) Mark P.J. van der Loo
|
||||||
- [fuzzywuzzy blog post](http://chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/)
|
- [fuzzywuzzy blog post](http://chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/)
|
||||||
|
|
|
@ -32,54 +32,26 @@ TokenSort,
|
||||||
TokenSet,
|
TokenSet,
|
||||||
TokenMax,
|
TokenMax,
|
||||||
graphemeiterator
|
graphemeiterator
|
||||||
##############################################################################
|
|
||||||
##
|
|
||||||
## Define GraphemeIterator as AbstractString
|
|
||||||
##
|
|
||||||
## Argument for AbstractString inheritance:
|
|
||||||
## (i) prevind, nextind, chr2ind, are defined once start, next, done, isvalid, endof are defined
|
|
||||||
## (ii) SubString(x::GraphemeIterator, i, j) works
|
|
||||||
## (ii) I can define functions with AbstractString signature in this package (but I could also just define a union type)
|
|
||||||
## Argument for non inheritance:
|
|
||||||
## (i) All existing types <: AbstractString gives char as individual, which is important for print_escaped & search.
|
|
||||||
## (ii) How to make split return GraphemeIterator rather than strings? How to join multiple GraphemeIterator w/o rewriting join?
|
|
||||||
##
|
|
||||||
##############################################################################
|
|
||||||
# from Base. I redefine it because I want AbstractStringinheritance
|
|
||||||
immutable GraphemeIterator{S<:AbstractString} <: AbstractString
|
|
||||||
s::S # original string (for generation of SubStrings)
|
|
||||||
end
|
|
||||||
graphemeiterator(s::AbstractString) = GraphemeIterator{typeof(s)}(s)
|
|
||||||
eltype{S}(::Type{GraphemeIterator{S}}) = SubString{S}
|
|
||||||
function length(g::GraphemeIterator)
|
|
||||||
c0 = Char(0x00ad) # soft hyphen (grapheme break always allowed after this)
|
|
||||||
n = 0
|
|
||||||
for c in g.s
|
|
||||||
n += isgraphemebreak(c0, c)
|
|
||||||
c0 = c
|
|
||||||
end
|
|
||||||
return n
|
|
||||||
end
|
|
||||||
start(g::GraphemeIterator) = start(g.s)
|
|
||||||
done(g::GraphemeIterator, i::Int) = done(g.s, i)
|
|
||||||
function next(g::GraphemeIterator, i::Int)
|
|
||||||
s = g.s
|
|
||||||
j = i
|
|
||||||
c0, k = next(s, i)
|
|
||||||
while !done(s, k) # loop until next grapheme is s[i:j]
|
|
||||||
c, ℓ = next(s, k)
|
|
||||||
isgraphemebreak(c0, c) && break
|
|
||||||
j = k
|
|
||||||
k = ℓ
|
|
||||||
c0 = c
|
|
||||||
end
|
|
||||||
return (SubString(s, i, j), k)
|
|
||||||
end
|
|
||||||
==(g1::GraphemeIterator, g2::GraphemeIterator) = g1.s == g2.s
|
|
||||||
hash(g::GraphemeIterator, h::UInt) = hash(g.s, h)
|
|
||||||
isless(g1::GraphemeIterator, g2::GraphemeIterator) = isless(g1.s, g2.s)
|
|
||||||
show{S}(io::IO, g::GraphemeIterator{S}) = print(io, "length-$(length(g)) GraphemeIterator{$S} for \"$(g.s)\"")
|
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
##
|
||||||
|
## TypeAlias
|
||||||
|
##
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
typealias GraphemeIterator Base.UTF8proc.GraphemeIterator
|
||||||
|
typealias AbstractStringorGraphemeIterator Union{AbstractString, Base.UTF8proc.GraphemeIterator}
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
##
|
||||||
|
## GraphemeIterator iterates on Grapheme
|
||||||
|
##
|
||||||
|
##############################################################################
|
||||||
|
Base.prevind(x::GraphemeIterator, i::Integer) = prevind(x.s, i)
|
||||||
|
Base.nextind(x::GraphemeIterator, i::Integer) = nextind(x.s, i)
|
||||||
|
Base.chr2ind(x::GraphemeIterator, i::Integer) = chr2ind(x.s, i)
|
||||||
|
Base.SubString(x::GraphemeIterator, i::Integer, j::Integer) = graphemeiterator(SubString(x.s, i::Integer, j::Integer))
|
||||||
|
graphemeiterator(s::AbstractString) = GraphemeIterator{typeof(s)}(s)
|
||||||
|
|
||||||
# added
|
# added
|
||||||
#these 2 functions allow to define prevind nextind, chr2ind, prevind etc
|
#these 2 functions allow to define prevind nextind, chr2ind, prevind etc
|
||||||
|
@ -91,7 +63,7 @@ function Base.isvalid(s::GraphemeIterator, i::Integer)
|
||||||
return i0 < start(s.s) || isgraphemebreak(s.s[i0], s.s[i])
|
return i0 < start(s.s) || isgraphemebreak(s.s[i0], s.s[i])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
function endof(s::GraphemeIterator)
|
function Base.endof(s::GraphemeIterator)
|
||||||
c0 = Char(0x00ad)
|
c0 = Char(0x00ad)
|
||||||
i = endof(s.s)
|
i = endof(s.s)
|
||||||
i0 = start(s.s)
|
i0 = start(s.s)
|
||||||
|
@ -128,7 +100,7 @@ include("modifiers/fuzzywuzzy.jl")
|
||||||
##############################################################################
|
##############################################################################
|
||||||
for x in (:evaluate, :compare)
|
for x in (:evaluate, :compare)
|
||||||
@eval begin
|
@eval begin
|
||||||
function $x(dist::PreMetric, s1::AbstractString, s2::AbstractString)
|
function $x(dist::PreMetric, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator)
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
if len1 > len2
|
if len1 > len2
|
||||||
return $x(dist, s2, s1, len2, len1)
|
return $x(dist, s2, s1, len2, len1)
|
||||||
|
@ -139,19 +111,20 @@ for x in (:evaluate, :compare)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
##
|
##
|
||||||
## compare
|
## compare
|
||||||
##
|
##
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
function compare(dist::PreMetric, s1::AbstractString, s2::AbstractString,
|
function compare(dist::PreMetric, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator,
|
||||||
len1::Integer, len2::Integer)
|
len1::Integer, len2::Integer)
|
||||||
1.0 - evaluate(dist, s1, s2, len1, len2)
|
1.0 - evaluate(dist, s1, s2, len1, len2)
|
||||||
end
|
end
|
||||||
|
|
||||||
function compare(dist::Union{Hamming, Levenshtein, DamerauLevenshtein},
|
function compare(dist::Union{Hamming, Levenshtein, DamerauLevenshtein},
|
||||||
s1::AbstractString, s2::AbstractString,
|
s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator,
|
||||||
len1::Integer, len2::Integer)
|
len1::Integer, len2::Integer)
|
||||||
distance = evaluate(dist, s1, s2, len1, len2)
|
distance = evaluate(dist, s1, s2, len1, len2)
|
||||||
len2 == 0 ? 1.0 : 1.0 - distance / len2
|
len2 == 0 ? 1.0 : 1.0 - distance / len2
|
||||||
|
@ -160,14 +133,14 @@ end
|
||||||
# compare always return a value between 0 and 1.
|
# compare always return a value between 0 and 1.
|
||||||
# When string length < q for qgram distance, returns s1 == s2
|
# When string length < q for qgram distance, returns s1 == s2
|
||||||
function compare(dist::AbstractQGram,
|
function compare(dist::AbstractQGram,
|
||||||
s1::AbstractString, s2::AbstractString,
|
s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator,
|
||||||
len1::Integer, len2::Integer)
|
len1::Integer, len2::Integer)
|
||||||
len1 <= (dist.q - 1) && return convert(Float64, s1 == s2)
|
len1 <= (dist.q - 1) && return convert(Float64, s1 == s2)
|
||||||
evaluate(dist, s1, s2, len1, len2)
|
evaluate(dist, s1, s2, len1, len2)
|
||||||
end
|
end
|
||||||
|
|
||||||
function compare(dist::QGram,
|
function compare(dist::QGram,
|
||||||
s1::AbstractString, s2::AbstractString,
|
s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator,
|
||||||
len1::Integer, len2::Integer)
|
len1::Integer, len2::Integer)
|
||||||
len1 <= (dist.q - 1) && return convert(Float64, s1 == s2)
|
len1 <= (dist.q - 1) && return convert(Float64, s1 == s2)
|
||||||
distance = evaluate(dist, s1, s2, len1, len2)
|
distance = evaluate(dist, s1, s2, len1, len2)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# Return start of commn substring in s1, start of common substring in s2, and length of substring
|
# Return start of commn substring in s1, start of common substring in s2, and length of substring
|
||||||
# Indexes refer to character number, not index (differ for Unicode strings)
|
# Indexes refer to character number, not index (differ for Unicode strings)
|
||||||
function longest_common_substring(s1::AbstractString, s2::AbstractString)
|
function longest_common_substring(s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator)
|
||||||
if length(s1) > length(s2)
|
if length(s1) > length(s2)
|
||||||
start2, start1, size= longest_common_substring(s2, s1)
|
start2, start1, size= longest_common_substring(s2, s1)
|
||||||
else
|
else
|
||||||
|
@ -28,7 +28,7 @@ function longest_common_substring(s1::AbstractString, s2::AbstractString)
|
||||||
return start1, start2, size
|
return start1, start2, size
|
||||||
end
|
end
|
||||||
|
|
||||||
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::AbstractString, s2::AbstractString, start1::Integer, start2::Integer)
|
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, start1::Integer, start2::Integer)
|
||||||
a = longest_common_substring(s1, s2)
|
a = longest_common_substring(s1, s2)
|
||||||
if a[3] > 0
|
if a[3] > 0
|
||||||
push!(x, (a[1] + start1 - 1, a[2] + start2 - 1, a[3]))
|
push!(x, (a[1] + start1 - 1, a[2] + start2 - 1, a[3]))
|
||||||
|
@ -43,14 +43,14 @@ function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::AbstractString, s2::
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function matching_blocks(s1::AbstractString, s2::AbstractString)
|
function matching_blocks(s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator)
|
||||||
x = Set{Tuple{Int, Int, Int}}()
|
x = Set{Tuple{Int, Int, Int}}()
|
||||||
matching_blocks!(x, s1, s2, 1, 1)
|
matching_blocks!(x, s1, s2, 1, 1)
|
||||||
return x
|
return x
|
||||||
end
|
end
|
||||||
|
|
||||||
type RatcliffObershelp <: PreMetric end
|
type RatcliffObershelp <: PreMetric end
|
||||||
function evaluate(dist::RatcliffObershelp, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function evaluate(dist::RatcliffObershelp, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
result = matching_blocks(s1, s2)
|
result = matching_blocks(s1, s2)
|
||||||
matched = 0
|
matched = 0
|
||||||
for x in result
|
for x in result
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
## Assumes length(s1) <= length(s2)
|
## Assumes length(s1) <= length(s2)
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1)
|
function common_prefix(s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, lim::Integer = -1)
|
||||||
start1 = start(s1)
|
start1 = start(s1)
|
||||||
start2 = start(s2)
|
start2 = start(s2)
|
||||||
l = 0
|
l = 0
|
||||||
|
@ -24,7 +24,7 @@ end
|
||||||
##
|
##
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString, len1::Integer, len2:: Integer)
|
function evaluate(dist::Hamming, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2:: Integer)
|
||||||
count = 0
|
count = 0
|
||||||
for (ch1, ch2) in zip(s1, s2)
|
for (ch1, ch2) in zip(s1, s2)
|
||||||
count += ch1 != ch2
|
count += ch1 != ch2
|
||||||
|
@ -42,7 +42,7 @@ end
|
||||||
|
|
||||||
|
|
||||||
type Levenshtein <: SemiMetric end
|
type Levenshtein <: SemiMetric end
|
||||||
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function evaluate(dist::Levenshtein, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
|
|
||||||
# prefix common to both strings can be ignored
|
# prefix common to both strings can be ignored
|
||||||
k, start1, start2 = common_prefix(s1, s2)
|
k, start1, start2 = common_prefix(s1, s2)
|
||||||
|
@ -90,7 +90,7 @@ end
|
||||||
|
|
||||||
type DamerauLevenshtein <: SemiMetric end
|
type DamerauLevenshtein <: SemiMetric end
|
||||||
|
|
||||||
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function evaluate(dist::DamerauLevenshtein, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
|
|
||||||
# prefix common to both strings can be ignored
|
# prefix common to both strings can be ignored
|
||||||
k, start1, start2 = common_prefix(s1, s2)
|
k, start1, start2 = common_prefix(s1, s2)
|
||||||
|
@ -158,7 +158,7 @@ end
|
||||||
|
|
||||||
type Jaro <: SemiMetric end
|
type Jaro <: SemiMetric end
|
||||||
|
|
||||||
function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function evaluate(dist::Jaro, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
# if len2 == 0, m = 0 so should be 1.0 according to wikipedia. Nope.
|
# if len2 == 0, m = 0 so should be 1.0 according to wikipedia. Nope.
|
||||||
len2 == 0 && return 0.0
|
len2 == 0 && return 0.0
|
||||||
|
|
||||||
|
@ -199,4 +199,4 @@ function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString, len1::Inte
|
||||||
return 1.0 - score
|
return 1.0 - score
|
||||||
end
|
end
|
||||||
|
|
||||||
jaro(s1::AbstractString, s2::AbstractString) = evaluate(Jaro(), s1, s2)
|
jaro(s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator) = evaluate(Jaro(), s1, s2)
|
||||||
|
|
|
@ -4,8 +4,8 @@
|
||||||
##
|
##
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
immutable QGramIterator{S <: AbstractString, T <: Integer}
|
immutable QGramIterator{S <: AbstractStringorGraphemeIterator, T <: Integer}
|
||||||
s::S # grapheorstring
|
s::S # grapheme
|
||||||
l::Int # length of string
|
l::Int # length of string
|
||||||
q::T # length of q-grams
|
q::T # length of q-grams
|
||||||
end
|
end
|
||||||
|
@ -23,7 +23,8 @@ function Base.done(qgram::QGramIterator, state)
|
||||||
istart, idend = state
|
istart, idend = state
|
||||||
done(qgram.s, idend)
|
done(qgram.s, idend)
|
||||||
end
|
end
|
||||||
Base.eltype(qgram::QGramIterator) = SubString{typeof(qgram.s)}
|
Base.eltype{S <: AbstractString, T}(qgram::QGramIterator{S, T}) = SubString{typeof(qgram.s)}
|
||||||
|
Base.eltype{S <: GraphemeIterator, T}(qgram::QGramIterator{S, T}) = SubString{typeof(qgram.s.s)}
|
||||||
Base.length(qgram::QGramIterator) = max(qgram.l - qgram.q + 1, 0)
|
Base.length(qgram::QGramIterator) = max(qgram.l - qgram.q + 1, 0)
|
||||||
function Base.collect(qgram::QGramIterator)
|
function Base.collect(qgram::QGramIterator)
|
||||||
x = Array(eltype(qgram), length(qgram))
|
x = Array(eltype(qgram), length(qgram))
|
||||||
|
@ -80,7 +81,7 @@ end
|
||||||
##############################################################################
|
##############################################################################
|
||||||
abstract AbstractQGram <: SemiMetric
|
abstract AbstractQGram <: SemiMetric
|
||||||
|
|
||||||
function evaluate(dist::AbstractQGram, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function evaluate(dist::AbstractQGram, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
sort1 = sort(QGramIterator(s1, len1, dist.q))
|
sort1 = sort(QGramIterator(s1, len1, dist.q))
|
||||||
sort2 = sort(QGramIterator(s2, len2, dist.q))
|
sort2 = sort(QGramIterator(s2, len2, dist.q))
|
||||||
evaluate(dist, CountInterator(sort1, sort2))
|
evaluate(dist, CountInterator(sort1, sort2))
|
||||||
|
|
|
@ -9,7 +9,7 @@ type Partial{T <: PreMetric} <: PreMetric
|
||||||
end
|
end
|
||||||
|
|
||||||
# general
|
# general
|
||||||
function compare(dist::Partial, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function compare(dist::Partial, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
len1 == len2 && return compare(dist.dist, s1, s2, len1, len2)
|
len1 == len2 && return compare(dist.dist, s1, s2, len1, len2)
|
||||||
len1 == 0 && return compare(dist.dist, "", "", 0, 0)
|
len1 == 0 && return compare(dist.dist, "", "", 0, 0)
|
||||||
iter = QGramIterator(s2, len2, len1)
|
iter = QGramIterator(s2, len2, len1)
|
||||||
|
@ -26,7 +26,7 @@ end
|
||||||
|
|
||||||
# Specialization for RatcliffObershelp distance
|
# Specialization for RatcliffObershelp distance
|
||||||
# Code follows https://github.com/seatgeek/fuzzywuzzy/blob/master/fuzzywuzzy/fuzz.py
|
# Code follows https://github.com/seatgeek/fuzzywuzzy/blob/master/fuzzywuzzy/fuzz.py
|
||||||
function compare(dist::Partial{RatcliffObershelp}, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function compare(dist::Partial{RatcliffObershelp}, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
len1 == len2 && return compare(dist.dist, s1, s2, len1, len2)
|
len1 == len2 && return compare(dist.dist, s1, s2, len1, len2)
|
||||||
out = 0.0
|
out = 0.0
|
||||||
result = matching_blocks(s1, s2)
|
result = matching_blocks(s1, s2)
|
||||||
|
@ -59,7 +59,7 @@ type TokenSort{T <: PreMetric} <: PreMetric
|
||||||
dist::T
|
dist::T
|
||||||
end
|
end
|
||||||
|
|
||||||
function compare(dist::TokenSort, s1::AbstractString, s2::AbstractString,
|
function compare(dist::TokenSort, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator,
|
||||||
len1::Integer, len2::Integer)
|
len1::Integer, len2::Integer)
|
||||||
if search(s1, Base._default_delims) > 0
|
if search(s1, Base._default_delims) > 0
|
||||||
s1 = iterator(typeof(s1), join(sort!(split(s1)), " "))
|
s1 = iterator(typeof(s1), join(sort!(split(s1)), " "))
|
||||||
|
@ -81,7 +81,7 @@ type TokenSet{T <: PreMetric} <: PreMetric
|
||||||
dist::T
|
dist::T
|
||||||
end
|
end
|
||||||
|
|
||||||
function compare(dist::TokenSet, s1::AbstractString, s2::AbstractString,
|
function compare(dist::TokenSet, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator,
|
||||||
len1::Integer, len2::Integer)
|
len1::Integer, len2::Integer)
|
||||||
v0, v1, v2 = _separate!(split(s1), split(s2))
|
v0, v1, v2 = _separate!(split(s1), split(s2))
|
||||||
s0 = iterator(typeof(s1), join(v0, " "))
|
s0 = iterator(typeof(s1), join(v0, " "))
|
||||||
|
@ -129,7 +129,7 @@ type TokenMax{T <: PreMetric} <: PreMetric
|
||||||
dist::T
|
dist::T
|
||||||
end
|
end
|
||||||
|
|
||||||
function compare(dist::TokenMax, s1::AbstractString, s2::AbstractString,
|
function compare(dist::TokenMax, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator,
|
||||||
len1::Integer, len2::Integer)
|
len1::Integer, len2::Integer)
|
||||||
base = compare(dist.dist, s1, s2, len1, len2)
|
base = compare(dist.dist, s1, s2, len1, len2)
|
||||||
unbase_scale = 0.95
|
unbase_scale = 0.95
|
||||||
|
|
|
@ -13,7 +13,7 @@ end
|
||||||
# restrict to distance between 0 and 1
|
# restrict to distance between 0 and 1
|
||||||
Winkler(x) = Winkler(x, 0.1, 0.7)
|
Winkler(x) = Winkler(x, 0.1, 0.7)
|
||||||
|
|
||||||
function compare(dist::Winkler, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
function compare(dist::Winkler, s1::AbstractStringorGraphemeIterator, s2::AbstractStringorGraphemeIterator, len1::Integer, len2::Integer)
|
||||||
score = compare(dist.dist, s1, s2, len1, len2)
|
score = compare(dist.dist, s1, s2, len1, len2)
|
||||||
l = common_prefix(s1, s2, 4)[1]
|
l = common_prefix(s1, s2, 4)[1]
|
||||||
# common prefix adjustment
|
# common prefix adjustment
|
||||||
|
|
Loading…
Reference in New Issue