simplify Ratcliff
parent
283ce87ef2
commit
d80071590b
30
src/edit.jl
30
src/edit.jl
|
@ -224,51 +224,47 @@ struct RatcliffObershelp <: SemiMetric end
|
||||||
|
|
||||||
function (dist::RatcliffObershelp)(s1, s2)
|
function (dist::RatcliffObershelp)(s1, s2)
|
||||||
((s1 === missing) | (s2 === missing)) && return missing
|
((s1 === missing) | (s2 === missing)) && return missing
|
||||||
|
s1, s2 = reorder(s1, s2)
|
||||||
n_matched = sum(last.(matching_blocks(s1, s2)))
|
n_matched = sum(last.(matching_blocks(s1, s2)))
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
len1 + len2 == 0 ? 0. : 1.0 - 2 * n_matched / (len1 + len2)
|
len1 + len2 == 0 ? 0. : 1.0 - 2 * n_matched / (len1 + len2)
|
||||||
end
|
end
|
||||||
|
|
||||||
function matching_blocks(s1, s2)
|
function matching_blocks(s1, s2)
|
||||||
matching_blocks!(Set{Tuple{Int, Int, Int}}(), s1, s2, length(s1), length(s2), 1, 1)
|
matching_blocks!(Set{Tuple{Int, Int, Int}}(), s1, s2, 1, 1)
|
||||||
end
|
end
|
||||||
|
|
||||||
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1, s2,
|
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1, s2, start1::Integer, start2::Integer)
|
||||||
len1::Integer, len2::Integer, start1::Integer, start2::Integer)
|
a = longest_common_pattern(s1, s2)
|
||||||
a = longest_common_pattern(s1, s2, len1 , len2)
|
|
||||||
# exit if there is no common substring
|
# exit if there is no common substring
|
||||||
a[3] == 0 && return x
|
a[3] == 0 && return x
|
||||||
# add the info of the common to the existing set
|
# add the info of the common to the existing set
|
||||||
push!(x, (a[1] + start1 - 1, a[2] + start2 - 1, a[3]))
|
push!(x, (a[1] + start1 - 1, a[2] + start2 - 1, a[3]))
|
||||||
# add the longest common substring that happens before
|
# add the longest common substring that happens before
|
||||||
s1before = _take(s1, a[1] - 1)
|
matching_blocks!(x, _take(s1, a[1] - 1), _take(s2, a[2] - 1), start1, start2)
|
||||||
s2before = _take(s2, a[2] - 1)
|
|
||||||
matching_blocks!(x, s1before, s2before, a[1] - 1, a[2] - 1, start1, start2)
|
|
||||||
# add the longest common substring that happens after
|
# add the longest common substring that happens after
|
||||||
s1after = _drop(s1, a[1] + a[3] - 1)
|
matching_blocks!(x, _drop(s1, a[1] + a[3] - 1), _drop(s2, a[2] + a[3] - 1),
|
||||||
s2after = _drop(s2, a[2] + a[3] - 1)
|
start1 + a[1] + a[3] - 1, start2 + a[2] + a[3] - 1)
|
||||||
matching_blocks!(x, s1after, s2after, len1 - (a[1] + a[3]) + 1,
|
|
||||||
len2 - (a[2] + a[3]) + 1, start1 + a[1] + a[3] - 1, start2 + a[2] + a[3] - 1)
|
|
||||||
return x
|
return x
|
||||||
end
|
end
|
||||||
|
|
||||||
function longest_common_pattern(s1, s2, len1::Integer, len2::Integer)
|
function longest_common_pattern(s1, s2)
|
||||||
if len1 > len2
|
if length(s1) > length(s2)
|
||||||
start2, start1, len = longest_common_pattern(s2, s1, len2, len1)
|
start2, start1, len = longest_common_pattern(s2, s1)
|
||||||
else
|
else
|
||||||
start1, start2, len = 0, 0, 0
|
start1, start2, len = 0, 0, 0
|
||||||
p = zeros(Int, len2)
|
p = zeros(Int, length(s2))
|
||||||
i1 = 0
|
i1 = 0
|
||||||
for ch1 in s1
|
for ch1 in s1
|
||||||
i1 += 1
|
i1 += 1
|
||||||
i2 = 0
|
|
||||||
oldp = 0
|
oldp = 0
|
||||||
|
i2 = 0
|
||||||
for ch2 in s2
|
for ch2 in s2
|
||||||
i2 += 1
|
i2 += 1
|
||||||
newp = 0
|
newp = 0
|
||||||
if ch1 == ch2
|
if ch1 == ch2
|
||||||
newp = oldp > 0 ? oldp : i2
|
newp = oldp > 0 ? oldp : i2
|
||||||
currentlength = (i2 - newp + 1)
|
currentlength = i2 - newp + 1
|
||||||
if currentlength > len
|
if currentlength > len
|
||||||
start1, start2, len = i1 - currentlength + 1, newp, currentlength
|
start1, start2, len = i1 - currentlength + 1, newp, currentlength
|
||||||
end
|
end
|
||||||
|
|
|
@ -63,14 +63,14 @@ function _take(s, n::Integer)
|
||||||
Base.Iterators.take(s, n)
|
Base.Iterators.take(s, n)
|
||||||
end
|
end
|
||||||
function _take(s::AbstractString, n::Integer)
|
function _take(s::AbstractString, n::Integer)
|
||||||
SubString(s, firstindex(s), nextind(s, 0, n))
|
StringWithLength(SubString(s, firstindex(s), nextind(s, 0, n)), n)
|
||||||
end
|
end
|
||||||
|
|
||||||
function _drop(s, n::Integer)
|
function _drop(s, n::Integer)
|
||||||
Base.Iterators.drop(s, n)
|
Base.Iterators.drop(s, n)
|
||||||
end
|
end
|
||||||
function _drop(s::AbstractString, n::Integer)
|
function _drop(s::AbstractString, n::Integer)
|
||||||
SubString(s, nextind(s, 0, n + 1), lastindex(s))
|
StringWithLength(SubString(s, nextind(s, 0, n + 1), lastindex(s)), length(s) - n)
|
||||||
end
|
end
|
||||||
|
|
||||||
function _slice(s, n1::Integer, n2::Integer)
|
function _slice(s, n1::Integer, n2::Integer)
|
||||||
|
|
Loading…
Reference in New Issue