pull/1/head
matthieugomez 2015-10-23 20:32:33 -04:00
parent 73f80c8271
commit 0de5a28922
2 changed files with 13 additions and 9 deletions

View File

@ -10,20 +10,24 @@ function f(out, t, x, y)
end
end
# I get 0.12 vs 0.10 in stringdist
# http://www.markvanderloo.eu/yaRb/2013/09/07/a-bit-of-benchmarking-with-string-distances/
# similar
@time f(Int, Levenshtein(), x, y)
@time f(Float64, JaroWinkler(0.1, 0.7, 5), x, y)
# all 5-10x slower
@time f(Float64, Jaccard(2), x, y)
@time f(Float64, Cosine(2), x, y)
@time f(Float64, Cosine(2), x, y)
@time f(Int QGram(2), x, y)
#= Rcode
library(stringdist)
x <- sapply(sample(5:25,1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
y <- sapply(sample(5:25,1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
stringdist(x,y,method='lv')
stringdist(x,y,method='jaccard')
stringdist(x,y,method='jaccard')
stringdist(x,y,method='cosine')
system.time(stringdist(x,y,method='lv'))
system.time(stringdist(x,y,method='jaccard'))
system.time(stringdist(x,y,method='cosine'))
system.time(stringdist(x,y,method='qgram'))
=#

View File

@ -115,15 +115,15 @@ function evaluate{T}(dist::Jaccard, s1::T, s2::T)
length(s1) > length(s2) && return evaluate(dist, s2, s1)
length(s2) == 0 && return 0.0
n2 = length(s2) - dist.q + 1
n1 = length(s1) - dist.q + 1
set2 = Set{T}()
n2 = length(s2) - dist.q + 1
@inbounds for i2 in 1:n2
push!(set2, s2[i2:(i2 + dist.q - 1)])
end
set1 = Set{T}()
n1 = length(s1) - dist.q + 1
@inbounds for i1 in 1:n1
push!(set1, s1[i1:(i1 + dist.q - 1)])
end