Delete benchmark.jl
parent
164448f5d7
commit
0c1443310c
|
@ -1,81 +0,0 @@
|
|||
|
||||
using StringDistances, Random
|
||||
Random.seed!(2)
|
||||
x = map(Random.randstring, rand(5:25,500_000))
|
||||
y = map(Random.randstring, rand(5:25,500_000))
|
||||
|
||||
function f(t, x, y; min_score = 0.0)
|
||||
[compare(x[i], y[i], t; min_score = min_score) for i in 1:length(x)]
|
||||
end
|
||||
|
||||
@time f(Jaro(), x, y)
|
||||
#0.3s
|
||||
@time f(Levenshtein(), x, y)
|
||||
# 0.35s. A bit faster than StringDist
|
||||
@time f(Levenshtein(), x, y, min_score = 0.8)
|
||||
# 0.11
|
||||
@time f(DamerauLevenshtein(), x, y)
|
||||
# 0.45s. Much faster than StringDist
|
||||
@time f(DamerauLevenshtein(), x, y, min_score = 0.8)
|
||||
# 0.08
|
||||
|
||||
|
||||
|
||||
@time findmax(x[1], y, Levenshtein())
|
||||
# 0.14
|
||||
@time findmax(x[1], y, DamerauLevenshtein())
|
||||
# 0.15
|
||||
|
||||
@time findall(x[1], y, Levenshtein())
|
||||
# 0.06
|
||||
@time findall(x[1], y, DamerauLevenshtein())
|
||||
# 0.05
|
||||
@time findall(x[1], y, Partial(DamerauLevenshtein()))
|
||||
# 0.9
|
||||
|
||||
@time findall(x[1], y, TokenSort(DamerauLevenshtein()))
|
||||
# 0.27
|
||||
@time findall(x[1], y, TokenSet(DamerauLevenshtein()))
|
||||
# 0.74
|
||||
@time findall(x[1], y, TokenMax(DamerauLevenshtein()))
|
||||
# 2.25
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# check
|
||||
function h(t, x, y; min_score = 1/3)
|
||||
out = fill(false, length(x))
|
||||
for i in eachindex(x)
|
||||
if compare(x[i], y[i], t) < min_score
|
||||
out[i] = compare(x[i], y[i], t ; min_score = min_score) ≈ 0.0
|
||||
else
|
||||
out[i] = compare(x[i], y[i], t ; min_score = min_score) ≈ compare(x[i], y[i], t)
|
||||
end
|
||||
end
|
||||
all(out)
|
||||
end
|
||||
h(Levenshtein(), x, y)
|
||||
h(DamerauLevenshtein(), x, y)
|
||||
|
||||
|
||||
|
||||
#= Rcode
|
||||
library(stringdist)
|
||||
x <- sapply(sample(5:25,5 * 1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
|
||||
y <- sapply(sample(5:25,5 * 1e5,replace=TRUE), function(n) paste(sample(letters,n,replace=TRUE),collapse=""))
|
||||
system.time(stringdist(x,y,method='lv', nthread = 1))
|
||||
system.time(stringdist(x,y,method='dl', nthread = 1))
|
||||
|
||||
# 0.472
|
||||
system.time(stringdist(x,y,method='jaccard', nthread = 1))
|
||||
# 0.739
|
||||
system.time(stringdist(x,y,method='cosine', nthread = 1))
|
||||
system.time(stringdist(x,y,method='qgram', nthread = 1))
|
||||
|
||||
=#
|
||||
|
Loading…
Reference in New Issue