diff --git a/benchmark/benchmark.jl b/benchmark/benchmark.jl index c89887c..79d2d2d 100644 --- a/benchmark/benchmark.jl +++ b/benchmark/benchmark.jl @@ -11,6 +11,7 @@ end @time f(Hamming(), x, y) @time f(Jaro(), x, y) @time f(Jaro(), x, y; min_dist = 0.9) +@time f(Winkler(Jaro()), x, y; min_dist = 0.9) @time f(Levenshtein(), x, y) # 0.3s. A bit faster than StringDist diff --git a/src/edit.jl b/src/edit.jl index 31b1358..9a48c47 100755 --- a/src/edit.jl +++ b/src/edit.jl @@ -109,8 +109,8 @@ struct Levenshtein <: SemiMetric end ## Source: http://blog.softwx.net/2014/12/optimizing-levenshtein-algorithm-in-c.html function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString; max_dist = max(length(s1), length(s2))) -s1, s2 = reorder(s1, s2) -len1, len2 = length(s1), length(s2) + s1, s2 = reorder(s1, s2) + len1, len2 = length(s1), length(s2) len2 - len1 >= max_dist && return max_dist # prefix common to both strings can be ignored k, x1, x2start = remove_prefix(s1, s2)