0.7 first

pull/7/head
matthieugomez 2018-07-04 12:07:26 -04:00
parent aa83b273a0
commit 2389f6f178
7 changed files with 37 additions and 35 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
benchmark/benchmark.md
PC25

View File

@ -1,3 +1,3 @@
julia 0.6
julia 0.7-
Distances
IterTools

View File

@ -96,8 +96,8 @@ function compare(dist::Partial{RatcliffObershelp}, s1::AbstractString, s2::Abstr
s2_start += len2 - s2_end
s2_end += len2 - s2_end
end
i2_start = chr2ind(s2, s2_start)
i2_end = s2_end == len2 ? endof(s2) : (chr2ind(s2, s2_end + 1) - 1)
i2_start = nextind(s2, 0, s2_start)
i2_end = s2_end == len2 ? endof(s2) : (nextind(s2, 0, s2_end + 1) - 1)
curr = compare(RatcliffObershelp(), s1, SubString(s2, i2_start, i2_end))
out = max(out, curr)
end

View File

@ -4,12 +4,12 @@
##############################################################################
function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1)
start1 = start(s1)
start2 = start(s2)
start1 = firstindex(s1)
start2 = firstindex(s2)
l = 0
while !done(s1, start1) && !done(s2, start2) && (l < lim || lim < 0)
ch1, nextstart1 = next(s1, start1)
ch2, nextstart2 = next(s2, start2)
while (start1 <= ncodeunits(s1)) && (start2 <= ncodeunits(s2)) && (l < lim || lim < 0)
ch1, nextstart1 = iterate(s1, start1)
ch2, nextstart2 = iterate(s2, start2)
ch1 != ch2 && break
l += 1
start1, start2 = nextstart1, nextstart2
@ -46,27 +46,27 @@ function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString)
# prefix common to both strings can be ignored
k, start1, start2 = common_prefix(s1, s2)
s2, len2, s1, len1 = reorder(s1, s2)
done(s1, start1) && return len2 - k
(start1 > ncodeunits(s1)) && return len2 - k
# distance initialized to first row of matrix
# => distance between "" and s2[1:i}
v0 = Array{Int}(len2 - k)
v0 = Array{Int}(undef, len2 - k)
@inbounds for i2 in 1:(len2 - k)
v0[i2] = i2
end
current = zero(0)
state1 = start1
i1 = 0
while !done(s1, state1)
while state1 <= ncodeunits(s1)
i1 += 1
ch1, state1 = next(s1, state1)
ch1, state1 = iterate(s1, i1)
left = (i1 - 1)
current = (i1 - 1)
state2 = start2
i2 = 0
while !done(s2, state2)
while state2 <= ncodeunits(s1)
i2 += 1
ch2, state2 = next(s2, state2)
ch2, state2 = iterate(s2, state2)
# update
above, current, left = current, left, v0[i2]
if ch1 != ch2
@ -95,32 +95,32 @@ function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractStri
# prefix common to both strings can be ignored
k, start1, start2 = common_prefix(s1, s2)
s2, len2, s1, len1 = reorder(s1, s2)
done(s1, start1) && return len2 - k
(start1 > ncodeunits(s1)) && return len2 - k
v0 = Array{Int}(len2 - k)
v0 = Array{Int}(undef, len2 - k)
@inbounds for i2 in 1:(len2 - k)
v0[i2] = i2
end
v2 = Array{Int}(len2 - k)
v2 = Array{Int}(undef, len2 - k)
ch1, = next(s1, start1)
ch1, = iterate(s1, start1)
current = 0
state1 = start1
i1 = 0
while !done(s1, state1)
while state1 <= ncodeunits(s1)
i1 += 1
prevch1 = ch1
ch1, state1 = next(s1, state1)
ch2, = next(s2, start2)
ch1, state1 = iterate(s1, i1)
ch2, = iterate(s2, start2)
left = (i1 - 1)
current = i1
nextTransCost = 0
state2 = start2
i2 = 0
while !done(s2, state2)
while state2 <= ncodeunits(s2)
i2 += 1
prevch2 = ch2
ch2, state2 = next(s2, state2)
ch2, state2 = iterate(s2, state2)
above = current
thisTransCost = nextTransCost
nextTransCost = v2[i2]
@ -168,21 +168,21 @@ function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
m = 0
flag = fill(false, len2)
i1 = 0
state1 = start(s1)
startstate2 = start(s2)
state1 = firstindex(s1)
startstate2 = firstindex(s2)
starti2 = 0
i1_match = fill!(Array{typeof(state1)}(len1), state1)
while !done(s1, state1)
ch1, newstate1 = next(s1, state1)
i1_match = fill!(Array{typeof(state1)}(undef, len1), state1)
while state1 <= ncodeunits(s1)
ch1, newstate1 = iterate(s1, i1)
i1 += 1
if starti2 < i1 - maxdist - 1
startstate2 = nextind(s2, startstate2)
startstate2 = iterate(s2, startstate2)
starti2 += 1
end
i2 = starti2
state2 = startstate2
while !done(s2, state2) && i2 <= i1 + maxdist
ch2, state2 = next(s2, state2)
while state2 <= len2 && i2 <= i1 + maxdist
ch2, state2 = iterate(s2, state2)
i2 += 1
if ch1 == ch2 && !flag[i2]
m += 1
@ -201,7 +201,7 @@ function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString)
i2 += 1
if flag[i2]
i1 += 1
t += ch2 != next(s1, i1_match[i1])[1]
t += ch2 != iterate(s1, i1_match[i1])[1]
end
end
m == 0.0 && return 1.0

View File

@ -1,5 +1,5 @@
using StringDistances, Base.Test
using StringDistances, Test
@test evaluate(Levenshtein(), "", "") == 0

View File

@ -1,5 +1,5 @@
using StringDistances, Base.Test
using StringDistances, Test
# Compare
@test compare(Hamming(), "", "abc") 0.0 atol = 1e-4

View File

@ -10,7 +10,7 @@ for test in tests
println("\t\033[1m\033[32mPASSED\033[0m: $(test)")
catch e
println("\t\033[1m\033[31mFAILED\033[0m: $(test)")
showerror(STDOUT, e, backtrace())
showerror(stdout, e, backtrace())
rethrow(e)
end
end