66 lines
1.5 KiB
Julia
66 lines
1.5 KiB
Julia
using StringDistances, Random
|
|
using BenchmarkTools
|
|
|
|
N = if length(ARGS) > 0
|
|
try
|
|
parse(Int, ARGS[1])
|
|
catch _
|
|
100
|
|
end
|
|
else
|
|
100 # default value
|
|
end
|
|
|
|
Maxlength = if length(ARGS) > 1
|
|
try
|
|
parse(Int, ARGS[2])
|
|
catch _
|
|
100
|
|
end
|
|
else
|
|
100 # default value
|
|
end
|
|
|
|
# If there are strings already cached to disk we start with them and only
|
|
# add new ones if needed.
|
|
using Serialization
|
|
const CacheFile = joinpath(@__DIR__(), "perfteststrings_$(Maxlength).juliabin")
|
|
SaveCache = false
|
|
|
|
S = if isfile(CacheFile)
|
|
try
|
|
res = deserialize(CacheFile)
|
|
println("Read $(length(res)) strings from cache file: $CacheFile")
|
|
res
|
|
catch err
|
|
String[]
|
|
end
|
|
else
|
|
println("Creating $N random strings.")
|
|
SaveCache = true
|
|
String[randstring(rand(3:Maxlength)) for _ in 1:N]
|
|
end
|
|
|
|
if length(S) < N
|
|
for i in (length(S)+1):N
|
|
push!(S, randstring(rand(3:Maxlength)))
|
|
end
|
|
SaveCache = true
|
|
end
|
|
|
|
if SaveCache
|
|
println("Saving cache file with $(length(S)) strings: $CacheFile")
|
|
serialize(CacheFile, S)
|
|
end
|
|
|
|
|
|
println("For ", Threads.nthreads(), " threads and ", N, " strings of max length ", Maxlength, ":")
|
|
|
|
dist = Cosine(2)
|
|
t1 = @belapsed dm1 = pairwise(dist, S; preprocess = false)
|
|
t2 = @belapsed dm2 = pairwise(dist, S; preprocess = true)
|
|
|
|
println(" - time WITHOUT pre-calculation: ", round(t1, digits = 3))
|
|
println(" - time WITH pre-calculation: ", round(t2, digits = 3))
|
|
println(" - speedup with pre-calculation: ", round(t1/t2, digits = 1))
|