better comments

pull/57/head
matthieugomez 2021-09-06 14:47:52 -04:00
parent e7585b56fb
commit c9483b6efa
2 changed files with 16 additions and 21 deletions

View File

@ -70,7 +70,7 @@ function (dist::Jaro)(s1, s2)
t += ch2 != ch1_match[i1]
end
end
return 1.0 - (m / len1 + m / len2 + (m - t/2) / m) / 3.0
return 1 - (m / len1 + m / len2 + (m - t/2) / m) / 3
end
@ -245,7 +245,7 @@ function (dist::RatcliffObershelp)(s1, s2)
s1, s2 = reorder(s1, s2)
n_matched = sum(last.(matching_blocks(s1, s2)))
len1, len2 = length(s1), length(s2)
len1 + len2 == 0 ? 0. : 1.0 - 2 * n_matched / (len1 + len2)
len1 + len2 == 0 ? 0.0 : 1 - 2 * n_matched / (len1 + len2)
end
function matching_blocks(s1, s2)

View File

@ -246,8 +246,7 @@ end
#==========================================================================
Preprocessing with Dictionary
We now define a special method for these special iterators
Special path for QGramDicts, iterators that store a dictionary associating qgrams to the number of their occurences
==========================================================================#
"""
@ -279,7 +278,8 @@ Base.iterate(s::QGramDict, state) = iterate(s.s, state)
function QGramDict(s, q::Integer = 2)
(s isa QGramDict) && (s.q == q) && return s
qgs = qgrams(s, q)
QGramDict{typeof(s), eltype(qgs)}(s, q, countdict(qgs))
countpairs = countdict(qgs)
QGramDict{typeof(s), eltype(qgs)}(s, q, countpairs)
end
# Turn a sequence of qgrams to a count dict for them, i.e. map each
@ -296,19 +296,19 @@ function countdict(qgrams)
@inbounds Base._setindex!(d, 1, qg, -index)
end
end
d
return d
end
function (dist::AbstractQGramDistance)(qc1::QGramDict, qc2::QGramDict)
dist.q == qc1.q == qc2.q || throw(ArgumentError("The distance and the QGramDict must have the same qgram length"))
counter = eval_start(dist)
d1, d2 = qc1.counts, qc2.counts
counter = eval_start(dist)
for (s1, n1) in d1
index = Base.ht_keyindex2!(d2, s1)
if index > 0
counter = eval_op(dist, counter, n1, d2.vals[index])
else
if index <= 0
counter = eval_op(dist, counter, n1, 0)
else
counter = eval_op(dist, counter, n1, d2.vals[index])
end
end
for (s2, n2) in d2
@ -322,8 +322,8 @@ end
#==========================================================================
Preprocessing with SortedVectors
We now define a special method for these special iterators
Special path for QGramSortedVectors, iterators that store a sorted vector associating qgrams to the number of their occurences
Constructing a QGramSortedVector requires elements of the initial iterator to have a natural order
==========================================================================#
"""
@ -366,15 +366,10 @@ function QGramSortedVector(s, q::Integer = 2)
QGramSortedVector{typeof(s), eltype(qgs)}(s, q, countpairs)
end
# To implement the distances we will count qgram matches
# between strings or pre-calculated AbstractQgramCounts objects.
# The abstract type defines different fallback versions which can be
# specialied by subtypes for best performance.
function (dist::AbstractQGramDistance)(qc1::QGramSortedVector, qc2::QGramSortedVector)
dist.q == qc1.q == qc2.q || throw(ArgumentError("The distance and the QGramSortedVectors must have the same qgram length"))
counter = eval_start(dist)
d1, d2 = qc1.counts, qc2.counts
counter = eval_start(dist)
i1 = i2 = 1
while true
# length can be zero
@ -392,13 +387,13 @@ function (dist::AbstractQGramDistance)(qc1::QGramSortedVector, qc2::QGramSortedV
@inbounds s1, n1 = d1[i1]
@inbounds s2, n2 = d2[i2]
cmpval = Base.cmp(s1, s2)
if cmpval == -1 # k1 < k2
if cmpval == -1 # s1 < s2
counter = eval_op(dist, counter, n1, 0)
i1 += 1
elseif cmpval == 1 # k2 < k1
elseif cmpval == 1 # s1 > s2
counter = eval_op(dist, counter, 0, n2)
i2 += 1
else
else # s1 == s2
counter = eval_op(dist, counter, n1, n2)
i1 += 1
i2 += 1