default constructor
parent
0de5a28922
commit
0ea1828288
|
@ -6,7 +6,7 @@ y = map(randstring, rand(5:25,100_000))
|
|||
function f(out, t, x, y)
|
||||
d = Array(out, length(x))
|
||||
@inbounds for i in 1:length(x)
|
||||
d[i] = StringDistances.evaluate(t, x[i], y[i])
|
||||
d[i] = evaluate(t, x[i], y[i])
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -18,7 +18,8 @@ end
|
|||
@time f(Float64, Jaccard(2), x, y)
|
||||
@time f(Float64, Cosine(2), x, y)
|
||||
@time f(Float64, Cosine(2), x, y)
|
||||
@time f(Int QGram(2), x, y)
|
||||
@time f(Int, QGram(2), x, y)
|
||||
|
||||
|
||||
|
||||
#= Rcode
|
||||
|
|
|
@ -9,22 +9,15 @@ module StringDistances
|
|||
##############################################################################
|
||||
|
||||
import Distances: evaluate, Hamming, hamming
|
||||
export evaluate,
|
||||
Hamming, hamming,
|
||||
Levenshtein, levenshtein,
|
||||
JaroWinkler, jaro_winkler, jaro,
|
||||
DamerauLevenshtein, damerau_levenshtein,
|
||||
QGram, qgram,
|
||||
Cosine, cosine,
|
||||
Jaccard, jaccard
|
||||
|
||||
export Hamming,
|
||||
Levenshtein,
|
||||
JaroWinkler,
|
||||
DamerauLevenshtein,
|
||||
QGram,
|
||||
Cosine,
|
||||
Jaccard,
|
||||
hamming,
|
||||
levenshtein,
|
||||
damerau_levenshtein,
|
||||
jaro_winkler,
|
||||
jaro,
|
||||
qgram,
|
||||
cosine,
|
||||
jaccard
|
||||
|
||||
|
||||
include("edit_distances.jl")
|
||||
|
|
|
@ -159,6 +159,7 @@ type JaroWinkler{T1 <: Number, T2 <: Number, T3 <: Integer}
|
|||
boosting_threshold::T2 # boost threshold. Default to 0.7
|
||||
long_threshold::T3 # long string adjustment. Default to 5
|
||||
end
|
||||
JaroWinkler() = JaroWinkler(0.1, 0.7, 5)
|
||||
|
||||
function evaluate{T}(dist::JaroWinkler, s1::T, s2::T)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
## q-gram is ∑ |v(s1, p) - v(s2, p)|
|
||||
##############################################################################
|
||||
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
## A Bag is like Set that it allows duplicated values
|
||||
|
@ -52,6 +51,8 @@ end
|
|||
type QGram{T <: Integer}
|
||||
q::T
|
||||
end
|
||||
QGram() = QGram(2)
|
||||
|
||||
|
||||
function evaluate{T}(dist::QGram, s1::T, s2::T)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
|
@ -80,6 +81,7 @@ qgram{T}(s1::T, s2::T; q = 2) = evaluate(QGram(q), s1, s2)
|
|||
type Cosine{T <: Integer}
|
||||
q::T
|
||||
end
|
||||
Cosine() = Cosine(2)
|
||||
|
||||
function evaluate{T}(dist::Cosine, s1::T, s2::T)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
|
@ -110,6 +112,7 @@ cosine{T}(s1::T, s2::T; q = 2) = evaluate(Cosine(q), s1, s2)
|
|||
type Jaccard{T <: Integer}
|
||||
q::T
|
||||
end
|
||||
Jaccard() = Jaccard(2)
|
||||
|
||||
function evaluate{T}(dist::Jaccard, s1::T, s2::T)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
|
@ -138,8 +141,4 @@ function evaluate{T}(dist::Jaccard, s1::T, s2::T)
|
|||
return 1.0 - n_intersect / (length(set1) + length(set2) - n_intersect)
|
||||
end
|
||||
|
||||
jaccard{T}(s1::T, s2::T; q = 2) = evaluate(Jaccard(q), s1, s2)
|
||||
|
||||
|
||||
|
||||
|
||||
jaccard{T}(s1::T, s2::T; q = 2) = evaluate(Jaccard(q), s1, s2)
|
Loading…
Reference in New Issue