From 99ace7c1f00c06c9266cbc87f3a259918f092b84 Mon Sep 17 00:00:00 2001 From: matthieugomez Date: Wed, 4 Jul 2018 15:04:06 -0400 Subject: [PATCH] simplify logic --- src/distances/edit.jl | 24 +++++++++--------------- src/distances/qgram.jl | 11 ++++++----- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/distances/edit.jl b/src/distances/edit.jl index 2150aea..1c6a606 100644 --- a/src/distances/edit.jl +++ b/src/distances/edit.jl @@ -5,8 +5,6 @@ function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1) # in case this loop never happens - out1 = firstindex(s1) - out2 = firstindex(s2) x1 = iterate(s1) x2 = iterate(s2) l = 0 @@ -14,13 +12,11 @@ function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1 ch1, state1 = x1 ch2, state2 = x2 ch1 != ch2 && break - out1 = state1 - out2 = state2 x1 = iterate(s1, state1) x2 = iterate(s2, state2) l += 1 end - return l, out1, out2 + return l, x1, x2 end ############################################################################## @@ -51,8 +47,7 @@ struct Levenshtein <: SemiMetric end function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString) # prefix common to both strings can be ignored s2, len2, s1, len1 = reorder(s1, s2) - k, start1, start2 = common_prefix(s1, s2) - x1 = iterate(s1, start1) + k, x1, x2start = common_prefix(s1, s2) (x1 == nothing) && return len2 - k # distance initialized to first row of matrix # => distance between "" and s2[1:i} @@ -68,7 +63,7 @@ function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString) left = (i1 - 1) current = (i1 - 1) i2 = 0 - x2 = iterate(s2, start2) + x2 = x2start while x2 != nothing i2 += 1 ch2, state2 = x2 @@ -100,8 +95,7 @@ struct DamerauLevenshtein <: SemiMetric end function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString) s2, len2, s1, len1 = reorder(s1, s2) # prefix common to both strings can be ignored - k, state1, start2 = common_prefix(s1, s2) - x1 = iterate(s1, state1) + k, x1, x2start = common_prefix(s1, s2) (x1 == nothing) && return len2 - k v0 = Array{Int}(undef, len2 - k) @inbounds for i2 in 1:(len2 - k) @@ -110,20 +104,18 @@ function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractStri v2 = Array{Int}(undef, len2 - k) current = 0 i1 = 0 - ch1 = first(s1) + prevch1, = x1 while (x1 != nothing) i1 += 1 - prevch1 = ch1 ch1, state1 = x1 - x2 = iterate(s2, start2) left = (i1 - 1) current = i1 nextTransCost = 0 - ch2, = x2 + prevch2, = x2start + x2 = x2start i2 = 0 while (x2 != nothing) i2 += 1 - prevch2 = ch2 ch2, state2 = x2 above = current thisTransCost = nextTransCost @@ -151,8 +143,10 @@ function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractStri end v0[i2] = current x2 = iterate(s2, state2) + prevch2 = ch2 end x1 = iterate(s1, state1) + prevch1 = ch1 end return current end diff --git a/src/distances/qgram.jl b/src/distances/qgram.jl index 0adc777..982c0b8 100644 --- a/src/distances/qgram.jl +++ b/src/distances/qgram.jl @@ -47,16 +47,16 @@ end function Base.iterate(s::CountIterator, state = (1, 1)) state1, state2 = state - state2 > length(s.v2) && state1 > length(s.v1) && return nothing iter1 = state2 > length(s.v2) iter2 = state1 > length(s.v1) + iter2 && iter1 && return nothing if iter1 - @inbounds x1 = s.v1[state1] + x1 = s.v1[state1] elseif iter2 - @inbounds x2 = s.v2[state2] + x2 = s.v2[state2] else - @inbounds x1 = s.v1[state1] - @inbounds x2 = s.v2[state2] + x1 = s.v1[state1] + x2 = s.v2[state2] iter1 = x1 <= x2 iter2 = x2 <= x1 end @@ -66,6 +66,7 @@ function Base.iterate(s::CountIterator, state = (1, 1)) end + ############################################################################## ## ## Distance on strings is computed by set distance on qgram sets