compathelper/new_version/2020-10-08-17-05-17-769-1797568811
parent
6b5f858158
commit
4df4bad6af
22
src/edit.jl
22
src/edit.jl
|
@ -124,24 +124,26 @@ function (dist::DamerauLevenshtein)(s1, s2, max_dist::Union{Integer, Nothing} =
|
|||
v = collect(1:(len2-k))
|
||||
w = similar(v)
|
||||
if max_dist !== nothing
|
||||
i2_start = k + 1
|
||||
i2_end = max_dist
|
||||
i2_start = 1
|
||||
i2_end = max_dist + 1
|
||||
end
|
||||
prevch1, prevch2 = first(s1), first(s2)
|
||||
current = 0
|
||||
for (i1, ch1) in enumerate(s1)
|
||||
i1 <= k && continue
|
||||
left = current = i1 - k - 1
|
||||
left = i1 - k - 1
|
||||
current = left + 1
|
||||
nextTransCost = 0
|
||||
if max_dist !== nothing
|
||||
i2_start += (i1 > 1 + max_dist - (len2 - len1)) ? 1 : 0
|
||||
i2_end += (i2_end < len2) ? 1 : 0
|
||||
i2_start += (i1 - k - 1 > max_dist - (len2 - len1)) ? 1 : 0
|
||||
i2_end += (i2_end <= len2) ? 1 : 0
|
||||
end
|
||||
for (i2, ch2) in enumerate(s2)
|
||||
i2 <= k && continue
|
||||
# no need to look beyond window of lower right diagonal - maxDistance cells
|
||||
#lower right diag is i1 - (len2 - len1)) and the upper left diagonal + max_dist cells (upper left is i1)
|
||||
if (max_dist !== nothing) && ((i2 < i2_start) | (i2 > i2_end))
|
||||
if i2 <= k
|
||||
prevch2 = ch2
|
||||
elseif (max_dist !== nothing) && ((i2 - k < i2_start) | (i2 - k >= i2_end))
|
||||
# no need to look beyond window of lower right diagonal - maxDistance cells
|
||||
#lower right diag is i1 - (len2 - len1)) and the upper left diagonal + max_dist cells (upper left is i1)
|
||||
prevch2 = ch2
|
||||
else
|
||||
above, current, left = current, left, v[i2 - k]
|
||||
|
@ -150,7 +152,7 @@ function (dist::DamerauLevenshtein)(s1, s2, max_dist::Union{Integer, Nothing} =
|
|||
if ch1 != ch2
|
||||
current = min(left, current, above) + 1
|
||||
# never happens at i2 = k + 1 because then the two previous characters were equal
|
||||
if (i1 > 1 + k) & (i2 > 1 + k) && (ch1 == prevch2) && (prevch1 == ch2)
|
||||
if (i1 - k > 1) & (i2 - k > 1) && (ch1 == prevch2) && (prevch1 == ch2)
|
||||
thisTransCost += 1
|
||||
current = min(current, thisTransCost)
|
||||
end
|
||||
|
|
|
@ -44,6 +44,8 @@ using StringDistances, Unicode, Test
|
|||
@test evaluate(DamerauLevenshtein(), "cape sand recycling ", "edith ann graham") == 17
|
||||
@test evaluate(DamerauLevenshtein(), "jellyifhs", "jellyfish") == 2
|
||||
@test evaluate(DamerauLevenshtein(), "ifhs", "fish") == 2
|
||||
@test DamerauLevenshtein()("abcdef", "abcxyf", 2) == 2
|
||||
|
||||
@test evaluate(DamerauLevenshtein(), [1, 2, 3], [1,2, 4]) == 1
|
||||
@test evaluate(DamerauLevenshtein(), graphemes("alborgów"), graphemes("amoniak")) == evaluate(DamerauLevenshtein(), "alborgów", "amoniak")
|
||||
@test DamerauLevenshtein()("bc", "abc") == 1
|
||||
|
@ -161,7 +163,7 @@ using StringDistances, Unicode, Test
|
|||
# Test with R package StringDist
|
||||
for x in solutions
|
||||
t, solution = x
|
||||
for i in 1:length(solution)
|
||||
for i in eachindex(solution)
|
||||
if isnan(evaluate(t, strings[i]...))
|
||||
@test isnan(solution[i])
|
||||
else
|
||||
|
@ -174,8 +176,19 @@ using StringDistances, Unicode, Test
|
|||
for i in eachindex(strings)
|
||||
@test round(Int, (1 - evaluate(RatcliffObershelp(), strings[i]...)) * 100) ≈ solution[i] atol = 1e-4
|
||||
end
|
||||
|
||||
# test max_dist
|
||||
for i in eachindex(strings)
|
||||
d = Levenshtein()(strings[i]...)
|
||||
@test Levenshtein()(strings[i]..., d) == d
|
||||
d = DamerauLevenshtein()(strings[i]...)
|
||||
@test DamerauLevenshtein()(strings[i]..., d) == d
|
||||
end
|
||||
end
|
||||
|
||||
d = DamerauLevenshtein()("abcdef", "abcxyf")
|
||||
@test DamerauLevenshtein()("abcdef", "abcxyf", d) == d
|
||||
|
||||
|
||||
|
||||
#= R test
|
||||
|
|
Loading…
Reference in New Issue