indentation
parent
3b63093a85
commit
95093458c6
|
@ -15,7 +15,7 @@
|
||||||
Support for ASCII, UTF-8 and Unicode
|
Support for ASCII, UTF-8 and Unicode
|
||||||
|
|
||||||
# Syntax
|
# Syntax
|
||||||
There are two possible syntaxes for each distance:
|
There are two possible syntaxes for any distance:
|
||||||
```julia
|
```julia
|
||||||
using StringDistances
|
using StringDistances
|
||||||
evaluate(Jaccard(2), "martha", "marhta")
|
evaluate(Jaccard(2), "martha", "marhta")
|
||||||
|
|
|
@ -8,8 +8,8 @@
|
||||||
function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString)
|
function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString)
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
len1 > len2 && return evaluate(dist, s2, s1)
|
len1 > len2 && return evaluate(dist, s2, s1)
|
||||||
count = 0
|
|
||||||
|
|
||||||
|
count = 0
|
||||||
state2 = start(s2)
|
state2 = start(s2)
|
||||||
for ch1 in s1
|
for ch1 in s1
|
||||||
ch2, state2 = next(s2, state2)
|
ch2, state2 = next(s2, state2)
|
||||||
|
@ -40,14 +40,13 @@ function common_prefix(s1::AbstractString, s2::AbstractString)
|
||||||
end
|
end
|
||||||
return start1, start2
|
return start1, start2
|
||||||
end
|
end
|
||||||
|
|
||||||
type Levenshtein end
|
type Levenshtein end
|
||||||
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString)
|
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString)
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
|
|
||||||
len1 > len2 && return evaluate(dist, s2, s1)
|
len1 > len2 && return evaluate(dist, s2, s1)
|
||||||
len2 == 0 && return 0
|
len2 == 0 && return 0
|
||||||
|
|
||||||
# common
|
|
||||||
start1, start2 = common_prefix(s1, s2)
|
start1, start2 = common_prefix(s1, s2)
|
||||||
done(s1, start1) && return len2
|
done(s1, start1) && return len2
|
||||||
|
|
||||||
|
@ -90,19 +89,18 @@ end
|
||||||
type DamerauLevenshtein end
|
type DamerauLevenshtein end
|
||||||
|
|
||||||
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString)
|
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString)
|
||||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
|
||||||
length(s2) == 0 && return 0
|
|
||||||
|
|
||||||
# common
|
|
||||||
len1, len2 = length(s1), length(s2)
|
len1, len2 = length(s1), length(s2)
|
||||||
|
len1 > len2 && return evaluate(dist, s2, s1)
|
||||||
|
len2 == 0 && return 0
|
||||||
|
|
||||||
start1, start2 = common_prefix(s1, s2)
|
start1, start2 = common_prefix(s1, s2)
|
||||||
done(s1, start1) && return len2
|
done(s1, start1) && return len2
|
||||||
|
|
||||||
v0 = Array(Int, length(s2))
|
v0 = Array(Int, len2)
|
||||||
@inbounds for i2 in 1:len2
|
@inbounds for i2 in 1:len2
|
||||||
v0[i2] = i2
|
v0[i2] = i2
|
||||||
end
|
end
|
||||||
v2 = Array(Int, length(s2))
|
v2 = Array(Int, len2)
|
||||||
|
|
||||||
ch1, = next(s1, start1)
|
ch1, = next(s1, start1)
|
||||||
current = 0
|
current = 0
|
||||||
|
|
|
@ -75,8 +75,10 @@ end
|
||||||
QGram() = QGram(2)
|
QGram() = QGram(2)
|
||||||
|
|
||||||
function evaluate(dist::QGram, s1::AbstractString, s2::AbstractString)
|
function evaluate(dist::QGram, s1::AbstractString, s2::AbstractString)
|
||||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
len1, len2 = length(s1), length(s2)
|
||||||
length(s2) == 0 && return 0
|
len1 > len2 && return evaluate(dist, s2, s1)
|
||||||
|
len2 == 0 && return 0
|
||||||
|
|
||||||
q1 = QGramIterator(s1, dist.q)
|
q1 = QGramIterator(s1, dist.q)
|
||||||
q2 = QGramIterator(s2, dist.q)
|
q2 = QGramIterator(s2, dist.q)
|
||||||
bag = Bag(q2)
|
bag = Bag(q2)
|
||||||
|
@ -103,8 +105,10 @@ end
|
||||||
Cosine() = Cosine(2)
|
Cosine() = Cosine(2)
|
||||||
|
|
||||||
function evaluate(dist::Cosine, s1::AbstractString, s2::AbstractString)
|
function evaluate(dist::Cosine, s1::AbstractString, s2::AbstractString)
|
||||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
len1, len2 = length(s1), length(s2)
|
||||||
length(s2) == 0 && return 0.0
|
len1 > len2 && return evaluate(dist, s2, s1)
|
||||||
|
len2 == 0 && return 0.0
|
||||||
|
|
||||||
bag2 = Bag(QGramIterator(s2, dist.q))
|
bag2 = Bag(QGramIterator(s2, dist.q))
|
||||||
bag1 = Bag(QGramIterator(s1, dist.q))
|
bag1 = Bag(QGramIterator(s1, dist.q))
|
||||||
numerator = 0
|
numerator = 0
|
||||||
|
@ -132,8 +136,10 @@ end
|
||||||
Jaccard() = Jaccard(2)
|
Jaccard() = Jaccard(2)
|
||||||
|
|
||||||
function evaluate(dist::Jaccard, s1::AbstractString, s2::AbstractString)
|
function evaluate(dist::Jaccard, s1::AbstractString, s2::AbstractString)
|
||||||
length(s1) > length(s2) && return evaluate(dist, s2, s1)
|
len1, len2 = length(s1), length(s2)
|
||||||
length(s2) == 0 && return 0.0
|
len1 > len2 && return evaluate(dist, s2, s1)
|
||||||
|
len2 == 0 && return 0.0
|
||||||
|
|
||||||
set2 = Set(QGramIterator(s2, dist.q))
|
set2 = Set(QGramIterator(s2, dist.q))
|
||||||
set1 = Set(QGramIterator(s1, dist.q))
|
set1 = Set(QGramIterator(s1, dist.q))
|
||||||
numerator = 0
|
numerator = 0
|
||||||
|
|
Loading…
Reference in New Issue