simplify logic

pull/7/head
matthieugomez 2018-07-04 15:04:06 -04:00
parent 69a008fcf8
commit 99ace7c1f0
2 changed files with 15 additions and 20 deletions

View File

@ -5,8 +5,6 @@
function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1) function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1)
# in case this loop never happens # in case this loop never happens
out1 = firstindex(s1)
out2 = firstindex(s2)
x1 = iterate(s1) x1 = iterate(s1)
x2 = iterate(s2) x2 = iterate(s2)
l = 0 l = 0
@ -14,13 +12,11 @@ function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1
ch1, state1 = x1 ch1, state1 = x1
ch2, state2 = x2 ch2, state2 = x2
ch1 != ch2 && break ch1 != ch2 && break
out1 = state1
out2 = state2
x1 = iterate(s1, state1) x1 = iterate(s1, state1)
x2 = iterate(s2, state2) x2 = iterate(s2, state2)
l += 1 l += 1
end end
return l, out1, out2 return l, x1, x2
end end
############################################################################## ##############################################################################
@ -51,8 +47,7 @@ struct Levenshtein <: SemiMetric end
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString) function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString)
# prefix common to both strings can be ignored # prefix common to both strings can be ignored
s2, len2, s1, len1 = reorder(s1, s2) s2, len2, s1, len1 = reorder(s1, s2)
k, start1, start2 = common_prefix(s1, s2) k, x1, x2start = common_prefix(s1, s2)
x1 = iterate(s1, start1)
(x1 == nothing) && return len2 - k (x1 == nothing) && return len2 - k
# distance initialized to first row of matrix # distance initialized to first row of matrix
# => distance between "" and s2[1:i} # => distance between "" and s2[1:i}
@ -68,7 +63,7 @@ function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString)
left = (i1 - 1) left = (i1 - 1)
current = (i1 - 1) current = (i1 - 1)
i2 = 0 i2 = 0
x2 = iterate(s2, start2) x2 = x2start
while x2 != nothing while x2 != nothing
i2 += 1 i2 += 1
ch2, state2 = x2 ch2, state2 = x2
@ -100,8 +95,7 @@ struct DamerauLevenshtein <: SemiMetric end
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString) function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString)
s2, len2, s1, len1 = reorder(s1, s2) s2, len2, s1, len1 = reorder(s1, s2)
# prefix common to both strings can be ignored # prefix common to both strings can be ignored
k, state1, start2 = common_prefix(s1, s2) k, x1, x2start = common_prefix(s1, s2)
x1 = iterate(s1, state1)
(x1 == nothing) && return len2 - k (x1 == nothing) && return len2 - k
v0 = Array{Int}(undef, len2 - k) v0 = Array{Int}(undef, len2 - k)
@inbounds for i2 in 1:(len2 - k) @inbounds for i2 in 1:(len2 - k)
@ -110,20 +104,18 @@ function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractStri
v2 = Array{Int}(undef, len2 - k) v2 = Array{Int}(undef, len2 - k)
current = 0 current = 0
i1 = 0 i1 = 0
ch1 = first(s1) prevch1, = x1
while (x1 != nothing) while (x1 != nothing)
i1 += 1 i1 += 1
prevch1 = ch1
ch1, state1 = x1 ch1, state1 = x1
x2 = iterate(s2, start2)
left = (i1 - 1) left = (i1 - 1)
current = i1 current = i1
nextTransCost = 0 nextTransCost = 0
ch2, = x2 prevch2, = x2start
x2 = x2start
i2 = 0 i2 = 0
while (x2 != nothing) while (x2 != nothing)
i2 += 1 i2 += 1
prevch2 = ch2
ch2, state2 = x2 ch2, state2 = x2
above = current above = current
thisTransCost = nextTransCost thisTransCost = nextTransCost
@ -151,8 +143,10 @@ function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractStri
end end
v0[i2] = current v0[i2] = current
x2 = iterate(s2, state2) x2 = iterate(s2, state2)
prevch2 = ch2
end end
x1 = iterate(s1, state1) x1 = iterate(s1, state1)
prevch1 = ch1
end end
return current return current
end end

View File

@ -47,16 +47,16 @@ end
function Base.iterate(s::CountIterator, state = (1, 1)) function Base.iterate(s::CountIterator, state = (1, 1))
state1, state2 = state state1, state2 = state
state2 > length(s.v2) && state1 > length(s.v1) && return nothing
iter1 = state2 > length(s.v2) iter1 = state2 > length(s.v2)
iter2 = state1 > length(s.v1) iter2 = state1 > length(s.v1)
iter2 && iter1 && return nothing
if iter1 if iter1
@inbounds x1 = s.v1[state1] x1 = s.v1[state1]
elseif iter2 elseif iter2
@inbounds x2 = s.v2[state2] x2 = s.v2[state2]
else else
@inbounds x1 = s.v1[state1] x1 = s.v1[state1]
@inbounds x2 = s.v2[state2] x2 = s.v2[state2]
iter1 = x1 <= x2 iter1 = x1 <= x2
iter2 = x2 <= x1 iter2 = x2 <= x1
end end
@ -66,6 +66,7 @@ function Base.iterate(s::CountIterator, state = (1, 1))
end end
############################################################################## ##############################################################################
## ##
## Distance on strings is computed by set distance on qgram sets ## Distance on strings is computed by set distance on qgram sets