store string length

tree
matthieugomez 2015-11-03 13:07:17 -05:00
parent 5b221e1682
commit 9e21ec1f33
2 changed files with 18 additions and 20 deletions

View File

@ -26,9 +26,7 @@ end
function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
count = 0
state2 = start(s2)
for ch1 in s1
ch2, state2 = next(s2, state2)
for (ch1, ch2) in zip(s1, s2)
count += ch1 != ch2
end
count += len2 - len1

View File

@ -12,8 +12,9 @@ abstract AbstractQGram <: SemiMetric
##############################################################################
type QGramIterator{S <: AbstractString, T <: Integer}
s::S
q::T
s::S # string
l::Int # length of string
q::T # length of q-grams
end
function Base.start(qgram::QGramIterator)
len = length(qgram.s)
@ -30,17 +31,17 @@ function Base.done(qgram::QGramIterator, state)
done(qgram.s, idend)
end
Base.eltype(qgram::QGramIterator) = SubString{typeof(qgram.s)}
Base.length(qgram::QGramIterator) = max(length(qgram.s) - qgram.q + 1, 0)
function Base.collect(qiter::QGramIterator)
x = Array(eltype(qiter), length(qiter))
Base.length(qgram::QGramIterator) = max(qgram.l - qgram.q + 1, 0)
function Base.collect(qgram::QGramIterator)
x = Array(eltype(qgram), length(qgram))
i = 0
for q in qiter
for q in qgram
i += 1
@inbounds x[i] = q
end
return x
end
Base.sort(qiter::QGramIterator) = sort!(collect(qiter), alg = QuickSort)
Base.sort(qgram::QGramIterator) = sort!(collect(qgram), alg = QuickSort)
##############################################################################
##
@ -93,9 +94,9 @@ end
QGram() = QGram(2)
function evaluate(dist::QGram, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
len2 == 0 && return 0
sort1 = sort(QGramIterator(s1, dist.q))
sort2 = sort(QGramIterator(s2, dist.q))
isempty(s2) && return 0
sort1 = sort(QGramIterator(s1, len1, dist.q))
sort2 = sort(QGramIterator(s2, len2, dist.q))
n = 0
for (n1, n2) in PairSortedIterator(sort1, sort2)
n += abs(n1 - n2)
@ -120,9 +121,9 @@ end
Cosine() = Cosine(2)
function evaluate(dist::Cosine, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
len2 == 0 && return 0
sort1 = sort(QGramIterator(s1, dist.q))
sort2 = sort(QGramIterator(s2, dist.q))
isempty(s2) && return 0
sort1 = sort(QGramIterator(s1, len1, dist.q))
sort2 = sort(QGramIterator(s2, len2, dist.q))
norm1, norm2, prodnorm = 0, 0, 0
for (n1, n2) in PairSortedIterator(sort1, sort2)
norm1 += n1^2
@ -154,10 +155,9 @@ end
Jaccard() = Jaccard(2)
function evaluate(dist::Jaccard, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
len2 == 0 && return 0
sort1 = sort(QGramIterator(s1, dist.q))
sort2 = sort(QGramIterator(s2, dist.q))
isempty(s2) && return 0
sort1 = sort(QGramIterator(s1, len1, dist.q))
sort2 = sort(QGramIterator(s2, len2, dist.q))
ndistinct1, ndistinct2, nintersect = 0, 0, 0
for (n1, n2) in PairSortedIterator(sort1, sort2)
ndistinct1 += n1 > 0