compathelper/new_version/2020-10-08-17-05-17-769-1797568811
parent
6b5f858158
commit
4df4bad6af
22
src/edit.jl
22
src/edit.jl
|
@ -124,24 +124,26 @@ function (dist::DamerauLevenshtein)(s1, s2, max_dist::Union{Integer, Nothing} =
|
||||||
v = collect(1:(len2-k))
|
v = collect(1:(len2-k))
|
||||||
w = similar(v)
|
w = similar(v)
|
||||||
if max_dist !== nothing
|
if max_dist !== nothing
|
||||||
i2_start = k + 1
|
i2_start = 1
|
||||||
i2_end = max_dist
|
i2_end = max_dist + 1
|
||||||
end
|
end
|
||||||
prevch1, prevch2 = first(s1), first(s2)
|
prevch1, prevch2 = first(s1), first(s2)
|
||||||
current = 0
|
current = 0
|
||||||
for (i1, ch1) in enumerate(s1)
|
for (i1, ch1) in enumerate(s1)
|
||||||
i1 <= k && continue
|
i1 <= k && continue
|
||||||
left = current = i1 - k - 1
|
left = i1 - k - 1
|
||||||
|
current = left + 1
|
||||||
nextTransCost = 0
|
nextTransCost = 0
|
||||||
if max_dist !== nothing
|
if max_dist !== nothing
|
||||||
i2_start += (i1 > 1 + max_dist - (len2 - len1)) ? 1 : 0
|
i2_start += (i1 - k - 1 > max_dist - (len2 - len1)) ? 1 : 0
|
||||||
i2_end += (i2_end < len2) ? 1 : 0
|
i2_end += (i2_end <= len2) ? 1 : 0
|
||||||
end
|
end
|
||||||
for (i2, ch2) in enumerate(s2)
|
for (i2, ch2) in enumerate(s2)
|
||||||
i2 <= k && continue
|
if i2 <= k
|
||||||
# no need to look beyond window of lower right diagonal - maxDistance cells
|
prevch2 = ch2
|
||||||
#lower right diag is i1 - (len2 - len1)) and the upper left diagonal + max_dist cells (upper left is i1)
|
elseif (max_dist !== nothing) && ((i2 - k < i2_start) | (i2 - k >= i2_end))
|
||||||
if (max_dist !== nothing) && ((i2 < i2_start) | (i2 > i2_end))
|
# no need to look beyond window of lower right diagonal - maxDistance cells
|
||||||
|
#lower right diag is i1 - (len2 - len1)) and the upper left diagonal + max_dist cells (upper left is i1)
|
||||||
prevch2 = ch2
|
prevch2 = ch2
|
||||||
else
|
else
|
||||||
above, current, left = current, left, v[i2 - k]
|
above, current, left = current, left, v[i2 - k]
|
||||||
|
@ -150,7 +152,7 @@ function (dist::DamerauLevenshtein)(s1, s2, max_dist::Union{Integer, Nothing} =
|
||||||
if ch1 != ch2
|
if ch1 != ch2
|
||||||
current = min(left, current, above) + 1
|
current = min(left, current, above) + 1
|
||||||
# never happens at i2 = k + 1 because then the two previous characters were equal
|
# never happens at i2 = k + 1 because then the two previous characters were equal
|
||||||
if (i1 > 1 + k) & (i2 > 1 + k) && (ch1 == prevch2) && (prevch1 == ch2)
|
if (i1 - k > 1) & (i2 - k > 1) && (ch1 == prevch2) && (prevch1 == ch2)
|
||||||
thisTransCost += 1
|
thisTransCost += 1
|
||||||
current = min(current, thisTransCost)
|
current = min(current, thisTransCost)
|
||||||
end
|
end
|
||||||
|
|
|
@ -44,6 +44,8 @@ using StringDistances, Unicode, Test
|
||||||
@test evaluate(DamerauLevenshtein(), "cape sand recycling ", "edith ann graham") == 17
|
@test evaluate(DamerauLevenshtein(), "cape sand recycling ", "edith ann graham") == 17
|
||||||
@test evaluate(DamerauLevenshtein(), "jellyifhs", "jellyfish") == 2
|
@test evaluate(DamerauLevenshtein(), "jellyifhs", "jellyfish") == 2
|
||||||
@test evaluate(DamerauLevenshtein(), "ifhs", "fish") == 2
|
@test evaluate(DamerauLevenshtein(), "ifhs", "fish") == 2
|
||||||
|
@test DamerauLevenshtein()("abcdef", "abcxyf", 2) == 2
|
||||||
|
|
||||||
@test evaluate(DamerauLevenshtein(), [1, 2, 3], [1,2, 4]) == 1
|
@test evaluate(DamerauLevenshtein(), [1, 2, 3], [1,2, 4]) == 1
|
||||||
@test evaluate(DamerauLevenshtein(), graphemes("alborgów"), graphemes("amoniak")) == evaluate(DamerauLevenshtein(), "alborgów", "amoniak")
|
@test evaluate(DamerauLevenshtein(), graphemes("alborgów"), graphemes("amoniak")) == evaluate(DamerauLevenshtein(), "alborgów", "amoniak")
|
||||||
@test DamerauLevenshtein()("bc", "abc") == 1
|
@test DamerauLevenshtein()("bc", "abc") == 1
|
||||||
|
@ -161,7 +163,7 @@ using StringDistances, Unicode, Test
|
||||||
# Test with R package StringDist
|
# Test with R package StringDist
|
||||||
for x in solutions
|
for x in solutions
|
||||||
t, solution = x
|
t, solution = x
|
||||||
for i in 1:length(solution)
|
for i in eachindex(solution)
|
||||||
if isnan(evaluate(t, strings[i]...))
|
if isnan(evaluate(t, strings[i]...))
|
||||||
@test isnan(solution[i])
|
@test isnan(solution[i])
|
||||||
else
|
else
|
||||||
|
@ -174,8 +176,19 @@ using StringDistances, Unicode, Test
|
||||||
for i in eachindex(strings)
|
for i in eachindex(strings)
|
||||||
@test round(Int, (1 - evaluate(RatcliffObershelp(), strings[i]...)) * 100) ≈ solution[i] atol = 1e-4
|
@test round(Int, (1 - evaluate(RatcliffObershelp(), strings[i]...)) * 100) ≈ solution[i] atol = 1e-4
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# test max_dist
|
||||||
|
for i in eachindex(strings)
|
||||||
|
d = Levenshtein()(strings[i]...)
|
||||||
|
@test Levenshtein()(strings[i]..., d) == d
|
||||||
|
d = DamerauLevenshtein()(strings[i]...)
|
||||||
|
@test DamerauLevenshtein()(strings[i]..., d) == d
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
d = DamerauLevenshtein()("abcdef", "abcxyf")
|
||||||
|
@test DamerauLevenshtein()("abcdef", "abcxyf", d) == d
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#= R test
|
#= R test
|
||||||
|
|
Loading…
Reference in New Issue