default constructor

pull/1/head
matthieugomez 2015-10-24 08:59:44 -04:00
parent 0de5a28922
commit 0ea1828288
4 changed files with 17 additions and 23 deletions

View File

@ -6,7 +6,7 @@ y = map(randstring, rand(5:25,100_000))
function f(out, t, x, y)
d = Array(out, length(x))
@inbounds for i in 1:length(x)
d[i] = StringDistances.evaluate(t, x[i], y[i])
d[i] = evaluate(t, x[i], y[i])
end
end
@ -18,7 +18,8 @@ end
@time f(Float64, Jaccard(2), x, y)
@time f(Float64, Cosine(2), x, y)
@time f(Float64, Cosine(2), x, y)
@time f(Int QGram(2), x, y)
@time f(Int, QGram(2), x, y)
#= Rcode

View File

@ -9,22 +9,15 @@ module StringDistances
##############################################################################
import Distances: evaluate, Hamming, hamming
export evaluate,
Hamming, hamming,
Levenshtein, levenshtein,
JaroWinkler, jaro_winkler, jaro,
DamerauLevenshtein, damerau_levenshtein,
QGram, qgram,
Cosine, cosine,
Jaccard, jaccard
export Hamming,
Levenshtein,
JaroWinkler,
DamerauLevenshtein,
QGram,
Cosine,
Jaccard,
hamming,
levenshtein,
damerau_levenshtein,
jaro_winkler,
jaro,
qgram,
cosine,
jaccard
include("edit_distances.jl")

View File

@ -159,6 +159,7 @@ type JaroWinkler{T1 <: Number, T2 <: Number, T3 <: Integer}
boosting_threshold::T2 # boost threshold. Default to 0.7
long_threshold::T3 # long string adjustment. Default to 5
end
JaroWinkler() = JaroWinkler(0.1, 0.7, 5)
function evaluate{T}(dist::JaroWinkler, s1::T, s2::T)
length(s1) > length(s2) && return evaluate(dist, s2, s1)

View File

@ -7,7 +7,6 @@
## q-gram is ∑ |v(s1, p) - v(s2, p)|
##############################################################################
##############################################################################
##
## A Bag is like Set that it allows duplicated values
@ -52,6 +51,8 @@ end
type QGram{T <: Integer}
q::T
end
QGram() = QGram(2)
function evaluate{T}(dist::QGram, s1::T, s2::T)
length(s1) > length(s2) && return evaluate(dist, s2, s1)
@ -80,6 +81,7 @@ qgram{T}(s1::T, s2::T; q = 2) = evaluate(QGram(q), s1, s2)
type Cosine{T <: Integer}
q::T
end
Cosine() = Cosine(2)
function evaluate{T}(dist::Cosine, s1::T, s2::T)
length(s1) > length(s2) && return evaluate(dist, s2, s1)
@ -110,6 +112,7 @@ cosine{T}(s1::T, s2::T; q = 2) = evaluate(Cosine(q), s1, s2)
type Jaccard{T <: Integer}
q::T
end
Jaccard() = Jaccard(2)
function evaluate{T}(dist::Jaccard, s1::T, s2::T)
length(s1) > length(s2) && return evaluate(dist, s2, s1)
@ -138,8 +141,4 @@ function evaluate{T}(dist::Jaccard, s1::T, s2::T)
return 1.0 - n_intersect / (length(set1) + length(set2) - n_intersect)
end
jaccard{T}(s1::T, s2::T; q = 2) = evaluate(Jaccard(q), s1, s2)
jaccard{T}(s1::T, s2::T; q = 2) = evaluate(Jaccard(q), s1, s2)