StringDistances.jl/benchmark/benchmark.jl

38 lines
933 B
Julia
Raw Normal View History

2015-10-24 00:47:43 +02:00
2019-03-29 14:04:23 +01:00
using StringDistances, Random
Random.seed!(2)
x = map(Random.randstring, rand(5:25,500_000))
y = map(Random.randstring, rand(5:25,500_000))
2018-07-04 23:27:40 +02:00
function f(t, x, y)
[evaluate(t, x[i], y[i]) for i in 1:length(x)]
2015-10-24 00:47:43 +02:00
end
2018-07-04 23:27:40 +02:00
# same speed as StringDist
@time f(Levenshtein(), x, y)
@time f(Jaro(), x, y)
@time f(RatcliffObershelp(), x, y)
2015-10-24 02:32:33 +02:00
2018-07-04 23:27:40 +02:00
# 4x slower compared to StringDist
@time f(Jaccard(2), x, y)
@time f(Cosine(2), x, y)
@time f(QGram(2), x, y)
2015-10-26 14:38:09 +01:00
2015-11-04 18:40:30 +01:00
#
2015-10-26 14:38:09 +01:00
2015-10-24 14:59:44 +02:00
2015-10-24 02:32:33 +02:00
2015-10-24 00:47:43 +02:00
#= Rcode
library(stringdist)
2018-07-04 23:27:40 +02:00
x <- sapply(sample(5:25,5 * 1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
y <- sapply(sample(5:25,5 * 1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
2015-11-02 18:54:47 +01:00
system.time(stringdist(x,y,method='lv', nthread = 1))
system.time(stringdist(x,y,method='jaccard', nthread = 1))
system.time(stringdist(x,y,method='cosine', nthread = 1))
system.time(stringdist(x,y,method='qgram', nthread = 1))
2015-10-24 00:47:43 +02:00
2015-11-02 18:52:23 +01:00
=#