add storage vector

pull/57/head
matthieugomez 2021-09-10 14:56:52 -04:00
parent 21ef95e028
commit 8979b33eb2
3 changed files with 20 additions and 13 deletions

View File

@ -26,7 +26,7 @@ end
@time f(DamerauLevenshtein(), x, y, min_score = 0.8);
# 0.08
@time f(RatcliffObershelp(), x, y);
# 0.8s
# 0.65s

View File

@ -248,25 +248,30 @@ function (dist::RatcliffObershelp)(s1, s2)
end
function length_matching_blocks(s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
# p is just a storage vector which will be reused
p = zeros(Int, max(end1 - start1, end2 - start2) + 1)
length_matching_blocks!(p, s1, s2, start1, start2, end1, end2)
end
function length_matching_blocks!(p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
end1 >= start1 || return 0
end2 >= start2 || return 0
j1, j2, len = longest_common_pattern(s1, s2, start1, start2, end1, end2)
j1, j2, len = longest_common_pattern!(p, s1, s2, start1, start2, end1, end2)
# exit if there is no common substring
len == 0 && return 0
return len +
length_matching_blocks(s1, s2, start1, start2, j1 - 1, j2 - 1) +
length_matching_blocks(s1, s2, j1 + len, j2 + len, end1, end2)
length_matching_blocks!(p, s1, s2, start1, start2, j1 - 1, j2 - 1) +
length_matching_blocks!(p, s1, s2, j1 + len, j2 + len, end1, end2)
end
function longest_common_pattern(s1, s2, start1, start2, end1, end2)
function longest_common_pattern!(p, s1, s2, start1, start2, end1, end2)
if end1 - start1 > end2 - start2
j2, j1, len = longest_common_pattern(s2, s1, start2, start1, end2, end1)
j2, j1, len = longest_common_pattern!(p, s2, s1, start2, start1, end2, end1)
else
j1, j2, len = 0, 0, 0
fill!(p, 0)
# p[i2-start2+1] stores the startingindex of the longest
# common pattern up to i2 with prevch1 as last matching character
p = zeros(Int, end2 - start2 + 1)
for (i1, ch1) in enumerate(s1)
i1 >= start1 || continue
i1 <= end1 || break

View File

@ -56,19 +56,21 @@ function (dist::Partial{RatcliffObershelp})(s1, s2)
end
function matching_blocks(s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
matching_blocks!(Set{Tuple{Int, Int, Int}}(), s1, s2, start1, start2, end1, end2)
x = Set{Tuple{Int, Int, Int}}()
p = zeros(Int, max(end1 - start1, end2 - start2) + 1)
matching_blocks!(x, p, s1, s2, start1, start2, end1, end2)
end
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
j1, j2, len = longest_common_pattern(s1, s2, start1, start2, end1, end2)
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
j1, j2, len = longest_common_pattern!(p, s1, s2, start1, start2, end1, end2)
# exit if there is no common substring
len == 0 && return x
# add the info of the common to the existing set
push!(x, (j1, j2, len))
# add the longest common substring that happens before
matching_blocks!(x, s1, s2, start1, start2, j1 - 1, j2 - 1)
matching_blocks!(x, p, s1, s2, start1, start2, j1 - 1, j2 - 1)
# add the longest common substring that happens after
matching_blocks!(x, s1, s2, j1 + len, j2 + len, end1, end2)
matching_blocks!(x, p, s1, s2, j1 + len, j2 + len, end1, end2)
return x
end