2015-10-24 00:47:43 +02:00
|
|
|
|
2015-11-02 18:52:23 +01:00
|
|
|
using DataStructures, StringDistances
|
2015-10-24 00:47:43 +02:00
|
|
|
|
|
|
|
x = map(randstring, rand(5:25,100_000))
|
|
|
|
y = map(randstring, rand(5:25,100_000))
|
|
|
|
function f(out, t, x, y)
|
|
|
|
d = Array(out, length(x))
|
2015-10-24 01:09:42 +02:00
|
|
|
@inbounds for i in 1:length(x)
|
2015-10-24 14:59:44 +02:00
|
|
|
d[i] = evaluate(t, x[i], y[i])
|
2015-10-24 00:47:43 +02:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-10-24 02:32:33 +02:00
|
|
|
# similar
|
2015-10-24 00:47:43 +02:00
|
|
|
@time f(Int, Levenshtein(), x, y)
|
2015-10-26 01:27:35 +01:00
|
|
|
@time f(Float64, Jaro(), x, y)
|
2015-10-24 02:32:33 +02:00
|
|
|
|
2015-11-02 18:54:47 +01:00
|
|
|
# 2x slower compared to StringDist
|
2015-10-24 14:59:44 +02:00
|
|
|
@time f(Int, QGram(2), x, y)
|
2015-10-26 14:38:09 +01:00
|
|
|
@time f(Float64, Cosine(2), x, y)
|
|
|
|
@time f(Float64, Jaccard(2), x, y)
|
|
|
|
|
2015-11-04 18:40:30 +01:00
|
|
|
#
|
|
|
|
@time f(Float64, RatcliffObershelp(), x, y)
|
|
|
|
|
2015-10-26 14:38:09 +01:00
|
|
|
|
|
|
|
|
2015-10-24 14:59:44 +02:00
|
|
|
|
2015-10-24 02:32:33 +02:00
|
|
|
|
|
|
|
|
2015-10-24 00:47:43 +02:00
|
|
|
#= Rcode
|
|
|
|
library(stringdist)
|
|
|
|
x <- sapply(sample(5:25,1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
|
|
|
|
y <- sapply(sample(5:25,1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
|
2015-11-02 18:54:47 +01:00
|
|
|
system.time(stringdist(x,y,method='lv', nthread = 1))
|
|
|
|
system.time(stringdist(x,y,method='jaccard', nthread = 1))
|
|
|
|
system.time(stringdist(x,y,method='cosine', nthread = 1))
|
|
|
|
system.time(stringdist(x,y,method='qgram', nthread = 1))
|
2015-10-24 00:47:43 +02:00
|
|
|
|
2015-11-02 18:52:23 +01:00
|
|
|
=#
|
|
|
|
|