StringDistances.jl/benchmark/benchmark.jl

33 lines
929 B
Julia
Raw Normal View History

2015-10-24 00:47:43 +02:00
using StringDistances
x = map(randstring, rand(5:25,100_000))
y = map(randstring, rand(5:25,100_000))
function f(out, t, x, y)
d = Array(out, length(x))
2015-10-24 01:09:42 +02:00
@inbounds for i in 1:length(x)
2015-10-24 00:47:43 +02:00
d[i] = StringDistances.evaluate(t, x[i], y[i])
end
end
2015-10-24 02:32:33 +02:00
# similar
2015-10-24 00:47:43 +02:00
@time f(Int, Levenshtein(), x, y)
2015-10-24 02:32:33 +02:00
@time f(Float64, JaroWinkler(0.1, 0.7, 5), x, y)
# all 5-10x slower
2015-10-24 00:47:43 +02:00
@time f(Float64, Jaccard(2), x, y)
@time f(Float64, Cosine(2), x, y)
2015-10-24 01:09:42 +02:00
@time f(Float64, Cosine(2), x, y)
2015-10-24 02:32:33 +02:00
@time f(Int QGram(2), x, y)
2015-10-24 00:47:43 +02:00
#= Rcode
library(stringdist)
x <- sapply(sample(5:25,1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
y <- sapply(sample(5:25,1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
2015-10-24 02:32:33 +02:00
system.time(stringdist(x,y,method='lv'))
system.time(stringdist(x,y,method='jaccard'))
system.time(stringdist(x,y,method='cosine'))
system.time(stringdist(x,y,method='qgram'))
2015-10-24 00:47:43 +02:00
=#