indentation
parent
3b63093a85
commit
95093458c6
|
@ -15,7 +15,7 @@
|
|||
Support for ASCII, UTF-8 and Unicode
|
||||
|
||||
# Syntax
|
||||
There are two possible syntaxes for each distance:
|
||||
There are two possible syntaxes for any distance:
|
||||
```julia
|
||||
using StringDistances
|
||||
evaluate(Jaccard(2), "martha", "marhta")
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len1 > len2 && return evaluate(dist, s2, s1)
|
||||
count = 0
|
||||
|
||||
count = 0
|
||||
state2 = start(s2)
|
||||
for ch1 in s1
|
||||
ch2, state2 = next(s2, state2)
|
||||
|
@ -40,14 +40,13 @@ function common_prefix(s1::AbstractString, s2::AbstractString)
|
|||
end
|
||||
return start1, start2
|
||||
end
|
||||
|
||||
type Levenshtein end
|
||||
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
|
||||
len1 > len2 && return evaluate(dist, s2, s1)
|
||||
len2 == 0 && return 0
|
||||
|
||||
# common
|
||||
start1, start2 = common_prefix(s1, s2)
|
||||
done(s1, start1) && return len2
|
||||
|
||||
|
@ -90,19 +89,18 @@ end
|
|||
type DamerauLevenshtein end
|
||||
|
||||
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
length(s2) == 0 && return 0
|
||||
|
||||
# common
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len1 > len2 && return evaluate(dist, s2, s1)
|
||||
len2 == 0 && return 0
|
||||
|
||||
start1, start2 = common_prefix(s1, s2)
|
||||
done(s1, start1) && return len2
|
||||
|
||||
v0 = Array(Int, length(s2))
|
||||
v0 = Array(Int, len2)
|
||||
@inbounds for i2 in 1:len2
|
||||
v0[i2] = i2
|
||||
end
|
||||
v2 = Array(Int, length(s2))
|
||||
v2 = Array(Int, len2)
|
||||
|
||||
ch1, = next(s1, start1)
|
||||
current = 0
|
||||
|
|
|
@ -75,8 +75,10 @@ end
|
|||
QGram() = QGram(2)
|
||||
|
||||
function evaluate(dist::QGram, s1::AbstractString, s2::AbstractString)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
length(s2) == 0 && return 0
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len1 > len2 && return evaluate(dist, s2, s1)
|
||||
len2 == 0 && return 0
|
||||
|
||||
q1 = QGramIterator(s1, dist.q)
|
||||
q2 = QGramIterator(s2, dist.q)
|
||||
bag = Bag(q2)
|
||||
|
@ -103,8 +105,10 @@ end
|
|||
Cosine() = Cosine(2)
|
||||
|
||||
function evaluate(dist::Cosine, s1::AbstractString, s2::AbstractString)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
length(s2) == 0 && return 0.0
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len1 > len2 && return evaluate(dist, s2, s1)
|
||||
len2 == 0 && return 0.0
|
||||
|
||||
bag2 = Bag(QGramIterator(s2, dist.q))
|
||||
bag1 = Bag(QGramIterator(s1, dist.q))
|
||||
numerator = 0
|
||||
|
@ -132,8 +136,10 @@ end
|
|||
Jaccard() = Jaccard(2)
|
||||
|
||||
function evaluate(dist::Jaccard, s1::AbstractString, s2::AbstractString)
|
||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
||||
length(s2) == 0 && return 0.0
|
||||
len1, len2 = length(s1), length(s2)
|
||||
len1 > len2 && return evaluate(dist, s2, s1)
|
||||
len2 == 0 && return 0.0
|
||||
|
||||
set2 = Set(QGramIterator(s2, dist.q))
|
||||
set1 = Set(QGramIterator(s1, dist.q))
|
||||
numerator = 0
|
||||
|
|
Loading…
Reference in New Issue