diff --git a/src/normalized.jl b/src/normalized.jl index 623a3fc..cfc45fe 100644 --- a/src/normalized.jl +++ b/src/normalized.jl @@ -22,5 +22,9 @@ end function evaluate{T <: QGram}(normalized::Normalized{T}, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer) distance = evaluate(normalized.dist, s1, s2, len1, len2) - return distance / (max(0, len1 - normalized.dist.q + 1) + max(0, len2 - normalized.dist.q + 1)) + if len1 == normalized.dist.q - 1 + return s1 == s2 ? 0.0 : 1.0 + else + return distance / (len1 + len2 - 2 * normalized.dist.q + 2) + end end \ No newline at end of file diff --git a/src/qgram.jl b/src/qgram.jl index 5baa55c..3b49b2d 100644 --- a/src/qgram.jl +++ b/src/qgram.jl @@ -116,7 +116,7 @@ function evaluate(dist::Cosine, s1::AbstractString, s2::AbstractString, len1::In numerator += v1 * get(bag2.dict, k, 0) end denominator = sqrt(sumabs2(values(bag1.dict))) * sqrt(sumabs2(values(bag2.dict))) - return denominator == 0 ? convert(Float64, 1 - (s1 == s2)) : 1.0 - numerator / denominator + return denominator != 0 ? 1.0 - numerator / denominator : s1 == s2 ? 0.0 : 1.0 end cosine(s1::AbstractString, s2::AbstractString; q::Integer = 2) = evaluate(Cosine(q), s1::AbstractString, s2::AbstractString) @@ -152,7 +152,7 @@ function evaluate(dist::Jaccard, s1::AbstractString, s2::AbstractString, len1::I end end denominator = length(set1) + length(set2) - numerator - return denominator == 0 ? convert(Float64, 1 - (s1 == s2)) : 1.0 - numerator / denominator + return denominator != 0 ? 1.0 - numerator / denominator : s1 == s2 ? 0.0 : 1.0 end jaccard(s1::AbstractString, s2::AbstractString; q::Integer = 2) = evaluate(Jaccard(q), s1::AbstractString, s2::AbstractString) \ No newline at end of file