pull/1/head
matthieugomez 2015-10-26 09:38:09 -04:00
parent c9af07b993
commit 71f5042013
2 changed files with 22 additions and 9 deletions

View File

@ -14,11 +14,13 @@ end
@time f(Int, Levenshtein(), x, y)
@time f(Float64, Jaro(), x, y)
# all 5-10x slower
@time f(Float64, Jaccard(2), x, y)
@time f(Float64, Cosine(2), x, y)
@time f(Float64, Cosine(2), x, y)
# all 5-10x slower compared to StringDist
@time f(Int, QGram(2), x, y)
@time f(Float64, Cosine(2), x, y)
@time f(Float64, Jaccard(2), x, y)

View File

@ -33,14 +33,15 @@ Base.length(qgram::QGramIterator) = length(qgram.s) - qgram.q + 1
##############################################################################
type Bag{Tv, Ti <: Integer}
dict::OrderedDict{Tv, Ti}
Bag() = new(OrderedDict{Tv, Ti}())
dict::Dict{Tv, Ti}
Bag() = new(Dict{Tv, Ti}())
end
function Base.push!{Tv, Ti}(bag::Bag{Tv, Ti}, x::Tv)
bag.dict[x] = get(bag.dict, x, zero(Ti)) + one(Ti)
return bag
end
Base.sizehint!(bag::Bag, i::Integer) = sizehint!(bag.dict, i)
function Base.delete!{Tv, Ti}(bag::Bag{Tv, Ti}, x)
v = get(bag.dict, x, zero(Ti))
@ -52,14 +53,24 @@ end
Base.length(bag::Bag) = convert(Int, sum(values(bag.dict)))
function Bag(s)
function Bag(s::QGramIterator)
bag = Bag{eltype(s), UInt}()
sizehint!(bag, length(s))
for x in s
push!(bag, x)
end
return bag
end
function Base.Set(s::QGramIterator)
set = Set{eltype(s)}()
sizehint!(set, length(s))
for x in s
push!(set, x)
end
return set
end
##############################################################################
##
## q-gram
@ -143,8 +154,8 @@ function evaluate(dist::Jaccard, s1::AbstractString, s2::AbstractString, len1::I
q1 = QGramIterator(s1, dist.q)
q2 = QGramIterator(s2, dist.q)
set2 = OrderedSet(q1)
set1 = OrderedSet(q2)
set2 = Set(q1)
set1 = Set(q2)
numerator = 0
for x in set1
if x in set2