0.7 first
parent
aa83b273a0
commit
2389f6f178
|
@ -0,0 +1,2 @@
|
|||
benchmark/benchmark.md
|
||||
PC25
|
|
@ -96,8 +96,8 @@ function compare(dist::Partial{RatcliffObershelp}, s1::AbstractString, s2::Abstr
|
|||
s2_start += len2 - s2_end
|
||||
s2_end += len2 - s2_end
|
||||
end
|
||||
i2_start = chr2ind(s2, s2_start)
|
||||
i2_end = s2_end == len2 ? endof(s2) : (chr2ind(s2, s2_end + 1) - 1)
|
||||
i2_start = nextind(s2, 0, s2_start)
|
||||
i2_end = s2_end == len2 ? endof(s2) : (nextind(s2, 0, s2_end + 1) - 1)
|
||||
curr = compare(RatcliffObershelp(), s1, SubString(s2, i2_start, i2_end))
|
||||
out = max(out, curr)
|
||||
end
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
##############################################################################
|
||||
|
||||
function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1)
|
||||
start1 = start(s1)
|
||||
start2 = start(s2)
|
||||
start1 = firstindex(s1)
|
||||
start2 = firstindex(s2)
|
||||
l = 0
|
||||
while !done(s1, start1) && !done(s2, start2) && (l < lim || lim < 0)
|
||||
ch1, nextstart1 = next(s1, start1)
|
||||
ch2, nextstart2 = next(s2, start2)
|
||||
while (start1 <= ncodeunits(s1)) && (start2 <= ncodeunits(s2)) && (l < lim || lim < 0)
|
||||
ch1, nextstart1 = iterate(s1, start1)
|
||||
ch2, nextstart2 = iterate(s2, start2)
|
||||
ch1 != ch2 && break
|
||||
l += 1
|
||||
start1, start2 = nextstart1, nextstart2
|
||||
|
@ -46,27 +46,27 @@ function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString)
|
|||
# prefix common to both strings can be ignored
|
||||
k, start1, start2 = common_prefix(s1, s2)
|
||||
s2, len2, s1, len1 = reorder(s1, s2)
|
||||
done(s1, start1) && return len2 - k
|
||||
(start1 > ncodeunits(s1)) && return len2 - k
|
||||
|
||||
# distance initialized to first row of matrix
|
||||
# => distance between "" and s2[1:i}
|
||||
v0 = Array{Int}(len2 - k)
|
||||
v0 = Array{Int}(undef, len2 - k)
|
||||
@inbounds for i2 in 1:(len2 - k)
|
||||
v0[i2] = i2
|
||||
end
|
||||
current = zero(0)
|
||||
state1 = start1
|
||||
i1 = 0
|
||||
while !done(s1, state1)
|
||||
while state1 <= ncodeunits(s1)
|
||||
i1 += 1
|
||||
ch1, state1 = next(s1, state1)
|
||||
ch1, state1 = iterate(s1, i1)
|
||||
left = (i1 - 1)
|
||||
current = (i1 - 1)
|
||||
state2 = start2
|
||||
i2 = 0
|
||||
while !done(s2, state2)
|
||||
while state2 <= ncodeunits(s1)
|
||||
i2 += 1
|
||||
ch2, state2 = next(s2, state2)
|
||||
ch2, state2 = iterate(s2, state2)
|
||||
# update
|
||||
above, current, left = current, left, v0[i2]
|
||||
if ch1 != ch2
|
||||
|
@ -95,32 +95,32 @@ function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractStri
|
|||
# prefix common to both strings can be ignored
|
||||
k, start1, start2 = common_prefix(s1, s2)
|
||||
s2, len2, s1, len1 = reorder(s1, s2)
|
||||
done(s1, start1) && return len2 - k
|
||||
(start1 > ncodeunits(s1)) && return len2 - k
|
||||
|
||||
v0 = Array{Int}(len2 - k)
|
||||
v0 = Array{Int}(undef, len2 - k)
|
||||
@inbounds for i2 in 1:(len2 - k)
|
||||
v0[i2] = i2
|
||||
end
|
||||
v2 = Array{Int}(len2 - k)
|
||||
v2 = Array{Int}(undef, len2 - k)
|
||||
|
||||
ch1, = next(s1, start1)
|
||||
ch1, = iterate(s1, start1)
|
||||
current = 0
|
||||
state1 = start1
|
||||
i1 = 0
|
||||
while !done(s1, state1)
|
||||
while state1 <= ncodeunits(s1)
|
||||
i1 += 1
|
||||
prevch1 = ch1
|
||||
ch1, state1 = next(s1, state1)
|
||||
ch2, = next(s2, start2)
|
||||
ch1, state1 = iterate(s1, i1)
|
||||
ch2, = iterate(s2, start2)
|
||||
left = (i1 - 1)
|
||||
current = i1
|
||||
nextTransCost = 0
|
||||
state2 = start2
|
||||
i2 = 0
|
||||
while !done(s2, state2)
|
||||
while state2 <= ncodeunits(s2)
|
||||
i2 += 1
|
||||
prevch2 = ch2
|
||||
ch2, state2 = next(s2, state2)
|
||||
ch2, state2 = iterate(s2, state2)
|
||||
above = current
|
||||
thisTransCost = nextTransCost
|
||||
nextTransCost = v2[i2]
|
||||
|
@ -168,21 +168,21 @@ function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
|
|||
m = 0
|
||||
flag = fill(false, len2)
|
||||
i1 = 0
|
||||
state1 = start(s1)
|
||||
startstate2 = start(s2)
|
||||
state1 = firstindex(s1)
|
||||
startstate2 = firstindex(s2)
|
||||
starti2 = 0
|
||||
i1_match = fill!(Array{typeof(state1)}(len1), state1)
|
||||
while !done(s1, state1)
|
||||
ch1, newstate1 = next(s1, state1)
|
||||
i1_match = fill!(Array{typeof(state1)}(undef, len1), state1)
|
||||
while state1 <= ncodeunits(s1)
|
||||
ch1, newstate1 = iterate(s1, i1)
|
||||
i1 += 1
|
||||
if starti2 < i1 - maxdist - 1
|
||||
startstate2 = nextind(s2, startstate2)
|
||||
startstate2 = iterate(s2, startstate2)
|
||||
starti2 += 1
|
||||
end
|
||||
i2 = starti2
|
||||
state2 = startstate2
|
||||
while !done(s2, state2) && i2 <= i1 + maxdist
|
||||
ch2, state2 = next(s2, state2)
|
||||
while state2 <= len2 && i2 <= i1 + maxdist
|
||||
ch2, state2 = iterate(s2, state2)
|
||||
i2 += 1
|
||||
if ch1 == ch2 && !flag[i2]
|
||||
m += 1
|
||||
|
@ -201,7 +201,7 @@ function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
|
|||
i2 += 1
|
||||
if flag[i2]
|
||||
i1 += 1
|
||||
t += ch2 != next(s1, i1_match[i1])[1]
|
||||
t += ch2 != iterate(s1, i1_match[i1])[1]
|
||||
end
|
||||
end
|
||||
m == 0.0 && return 1.0
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
using StringDistances, Base.Test
|
||||
using StringDistances, Test
|
||||
|
||||
|
||||
@test evaluate(Levenshtein(), "", "") == 0
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
using StringDistances, Base.Test
|
||||
using StringDistances, Test
|
||||
|
||||
# Compare
|
||||
@test compare(Hamming(), "", "abc") ≈ 0.0 atol = 1e-4
|
||||
|
|
|
@ -10,7 +10,7 @@ for test in tests
|
|||
println("\t\033[1m\033[32mPASSED\033[0m: $(test)")
|
||||
catch e
|
||||
println("\t\033[1m\033[31mFAILED\033[0m: $(test)")
|
||||
showerror(STDOUT, e, backtrace())
|
||||
showerror(stdout, e, backtrace())
|
||||
rethrow(e)
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue