redefine graphemeiterator
parent
d8ac44be23
commit
0a2d96a848
|
@ -7,7 +7,8 @@ module StringDistances
|
|||
## Export
|
||||
##
|
||||
##############################################################################
|
||||
import Base.UTF8proc.GraphemeIterator
|
||||
import Base: eltype, length, start, done, next, ==, hash, isless, convert, show, endof
|
||||
import Base.UTF8proc: isgraphemebreak
|
||||
import Distances: evaluate, Hamming, hamming, PreMetric, SemiMetric
|
||||
import Iterators: chain
|
||||
export
|
||||
|
@ -29,57 +30,78 @@ Winkler,
|
|||
Partial,
|
||||
TokenSort,
|
||||
TokenSet,
|
||||
TokenMax
|
||||
TokenMax,
|
||||
graphemeiterator
|
||||
##############################################################################
|
||||
##
|
||||
## Define 2 iterators. One on character one on Grapheme.
|
||||
## Define GraphemeIterator as AbstractString
|
||||
##
|
||||
## Argument for non inheritance: AbstractString is generally something that gives char as individual.
|
||||
## Argument for inheritance: prevind, nextind, chr2ind are defined once start, next, done, isvalid, endof are defined. Also this allows to define functions with AbstractString signature & having SubString work.
|
||||
##############################################################################
|
||||
|
||||
immutable CharacterIterator{T <: AbstractString}
|
||||
s::T
|
||||
# from Base. I redefine it because I want AbstractStringinheritance
|
||||
immutable GraphemeIterator{S<:AbstractString} <: AbstractString
|
||||
s::S # original string (for generation of SubStrings)
|
||||
end
|
||||
Base.start(x::CharacterIterator) = start(x.s)
|
||||
Base.next(x::CharacterIterator, i::Integer) = next(x.s, i)
|
||||
Base.done(x::CharacterIterator, i::Integer) = done(x.s, i)
|
||||
Base.length(x::CharacterIterator) = length(x.s)
|
||||
graphemeiterator(s::AbstractString) = GraphemeIterator{typeof(s)}(s)
|
||||
eltype{S}(::Type{GraphemeIterator{S}}) = SubString{S}
|
||||
function length(g::GraphemeIterator)
|
||||
c0 = Char(0x00ad) # soft hyphen (grapheme break always allowed after this)
|
||||
n = 0
|
||||
for c in g.s
|
||||
n += isgraphemebreak(c0, c)
|
||||
c0 = c
|
||||
end
|
||||
return n
|
||||
end
|
||||
start(g::GraphemeIterator) = start(g.s)
|
||||
done(g::GraphemeIterator, i::Int) = done(g.s, i)
|
||||
function next(g::GraphemeIterator, i::Int)
|
||||
s = g.s
|
||||
j = i
|
||||
c0, k = next(s, i)
|
||||
while !done(s, k) # loop until next grapheme is s[i:j]
|
||||
c, ℓ = next(s, k)
|
||||
isgraphemebreak(c0, c) && break
|
||||
j = k
|
||||
k = ℓ
|
||||
c0 = c
|
||||
end
|
||||
return (SubString(s, i, j), k)
|
||||
end
|
||||
==(g1::GraphemeIterator, g2::GraphemeIterator) = g1.s == g2.s
|
||||
hash(g::GraphemeIterator, h::UInt) = hash(g.s, h)
|
||||
isless(g1::GraphemeIterator, g2::GraphemeIterator) = isless(g1.s, g2.s)
|
||||
show{S}(io::IO, g::GraphemeIterator{S}) = print(io, "length-$(length(g)) GraphemeIterator{$S} for \"$(g.s)\"")
|
||||
|
||||
Base.nextind(x::CharacterIterator, i::Integer) = nextind(x.s, i)
|
||||
Base.chr2ind(x::CharacterIterator, i::Integer) = chr2ind(x.s, i::Integer)
|
||||
iteratortype{T <: CharacterIterator}(::Type{T}) = CharacterIterator
|
||||
Base.convert{T}(::Type{CharacterIterator{T}}, x::T) = CharacterIterator(x)
|
||||
|
||||
# add the following methods
|
||||
Base.nextind(g::GraphemeIterator, state::Integer) = next(g, state)[2]
|
||||
function Base.chr2ind(s::GraphemeIterator, i::Integer)
|
||||
i < start(s) && throw(BoundsError(s.s, i))
|
||||
j = 1
|
||||
k = start(s)
|
||||
while true
|
||||
c, l = next(s,k)
|
||||
if i == j
|
||||
return k
|
||||
end
|
||||
j += 1
|
||||
k = l
|
||||
# added
|
||||
#used in prevind nextind
|
||||
function Base.isvalid(s::GraphemeIterator, i::Integer)
|
||||
if !isvalid(s.s, i)
|
||||
return false
|
||||
else
|
||||
i0 = prevind(s.s, i)
|
||||
return i0 < start(s.s) || isgraphemebreak(s.s[i0], s.s[i])
|
||||
end
|
||||
end
|
||||
iteratortype{T <: GraphemeIterator}(::Type{T}) = GraphemeIterator
|
||||
Base.convert{T}(::Type{GraphemeIterator{T}}, x::T) = GraphemeIterator(x)
|
||||
function endof(s::GraphemeIterator)
|
||||
c0 = Char(0x00ad)
|
||||
i = endof(s.s)
|
||||
i0 = start(s.s)
|
||||
while i >= i0 && !isgraphemebreak(s.s[i], c0)
|
||||
i = prevind(s.s, i)
|
||||
c0 = s.s[i]
|
||||
end
|
||||
i
|
||||
end
|
||||
|
||||
|
||||
typealias StringIterator{T} Union{GraphemeIterator{T}, CharacterIterator{T}}
|
||||
Base.endof(g::StringIterator) = endof(g.s)
|
||||
Base.SubString(x::StringIterator, i, j) = iteratortype(x)(SubString(x.s, i, j))
|
||||
Base.string(x::StringIterator) = x.s
|
||||
Base.isempty(x::StringIterator) = isempty(x.s)
|
||||
Base.eltype{T}(x::StringIterator{T}) = T
|
||||
Base.isless(x::StringIterator, y::StringIterator) = isless(x.s, y.s)
|
||||
Base.search(x::StringIterator, args...) = search(x.s, args...)
|
||||
Base.searchsortedfirst(x::StringIterator, args...) = searchsortedfirst(x.s, args...)
|
||||
Base.searchsortedlast(x::StringIterator, args...) = searchsortedlast(x.s, args...)
|
||||
Base.searchsorted(x::StringIterator, args...) = searchsorted(x.s, args...)
|
||||
iteratortype(x::StringIterator) = iteratortype(typeof(x))
|
||||
# 1. issues with stuff like search or print_escaped where character iteration vs string iteration matters. I need to pass the original string for now
|
||||
Base.search(x::GraphemeIterator, s::Vector{Char}) = search(x.s, s)
|
||||
# 2. issue with keeping iterator property for stuff like split, join. for now, I decide to loose the enumerator property but add it back after join. But SubString for instance does not loose the property
|
||||
Base.split(x::GraphemeIterator, args...) = split(x.s, args...)
|
||||
iterator{T <: GraphemeIterator}(::Type{T}, x::AbstractString) = graphemeiterator(x)
|
||||
iterator{T <: AbstractString}(::Type{T}, x::AbstractString) = x
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
|
@ -101,14 +123,6 @@ include("modifiers/fuzzywuzzy.jl")
|
|||
for x in (:evaluate, :compare)
|
||||
@eval begin
|
||||
function $x(dist::PreMetric, s1::AbstractString, s2::AbstractString)
|
||||
$x(dist, CharacterIterator(s1), CharacterIterator(s2))
|
||||
end
|
||||
|
||||
function $x(dist::PreMetric, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
$x(dist, CharacterIterator(s1), CharacterIterator(s2), len1, len2)
|
||||
end
|
||||
|
||||
function $x(dist::PreMetric, s1::StringIterator, s2::StringIterator)
|
||||
len1, len2 = length(s1), length(s2)
|
||||
if len1 > len2
|
||||
return $x(dist, s2, s1, len2, len1)
|
||||
|
@ -125,13 +139,13 @@ end
|
|||
##
|
||||
##############################################################################
|
||||
|
||||
function compare(dist::PreMetric, s1::StringIterator, s2::StringIterator,
|
||||
function compare(dist::PreMetric, s1::AbstractString, s2::AbstractString,
|
||||
len1::Integer, len2::Integer)
|
||||
1.0 - evaluate(dist, s1, s2, len1, len2)
|
||||
end
|
||||
|
||||
function compare(dist::Union{Hamming, Levenshtein, DamerauLevenshtein},
|
||||
s1::StringIterator, s2::StringIterator,
|
||||
s1::AbstractString, s2::AbstractString,
|
||||
len1::Integer, len2::Integer)
|
||||
distance = evaluate(dist, s1, s2, len1, len2)
|
||||
len2 == 0 ? 1.0 : 1.0 - distance / len2
|
||||
|
@ -140,14 +154,14 @@ end
|
|||
# compare always return a value between 0 and 1.
|
||||
# When string length < q for qgram distance, returns s1 == s2
|
||||
function compare(dist::AbstractQGram,
|
||||
s1::StringIterator, s2::StringIterator,
|
||||
s1::AbstractString, s2::AbstractString,
|
||||
len1::Integer, len2::Integer)
|
||||
len1 <= (dist.q - 1) && return convert(Float64, s1 == s2)
|
||||
evaluate(dist, s1, s2, len1, len2)
|
||||
end
|
||||
|
||||
function compare(dist::QGram,
|
||||
s1::StringIterator, s2::StringIterator,
|
||||
s1::AbstractString, s2::AbstractString,
|
||||
len1::Integer, len2::Integer)
|
||||
len1 <= (dist.q - 1) && return convert(Float64, s1 == s2)
|
||||
distance = evaluate(dist, s1, s2, len1, len2)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Return start of commn substring in s1, start of common substring in s2, and length of substring
|
||||
# Indexes refer to character number, not index (differ for Unicode strings)
|
||||
function longest_common_substring(s1::StringIterator, s2::StringIterator)
|
||||
function longest_common_substring(s1::AbstractString, s2::AbstractString)
|
||||
if length(s1) > length(s2)
|
||||
start2, start1, size= longest_common_substring(s2, s1)
|
||||
else
|
||||
|
@ -28,7 +28,7 @@ function longest_common_substring(s1::StringIterator, s2::StringIterator)
|
|||
return start1, start2, size
|
||||
end
|
||||
|
||||
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::StringIterator, s2::StringIterator, start1::Integer, start2::Integer)
|
||||
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::AbstractString, s2::AbstractString, start1::Integer, start2::Integer)
|
||||
a = longest_common_substring(s1, s2)
|
||||
if a[3] > 0
|
||||
push!(x, (a[1] + start1 - 1, a[2] + start2 - 1, a[3]))
|
||||
|
@ -43,14 +43,14 @@ function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::StringIterator, s2::
|
|||
end
|
||||
end
|
||||
|
||||
function matching_blocks(s1::StringIterator, s2::StringIterator)
|
||||
function matching_blocks(s1::AbstractString, s2::AbstractString)
|
||||
x = Set{Tuple{Int, Int, Int}}()
|
||||
matching_blocks!(x, s1, s2, 1, 1)
|
||||
return x
|
||||
end
|
||||
|
||||
type RatcliffObershelp <: PreMetric end
|
||||
function evaluate(dist::RatcliffObershelp, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function evaluate(dist::RatcliffObershelp, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
result = matching_blocks(s1, s2)
|
||||
matched = 0
|
||||
for x in result
|
||||
|
@ -59,7 +59,7 @@ function evaluate(dist::RatcliffObershelp, s1::StringIterator, s2::StringIterato
|
|||
1.0 - 2 * matched / (len1 + len2)
|
||||
end
|
||||
|
||||
#function buildref(s::StringIterator, len)
|
||||
#function buildref(s::AbstractString, len)
|
||||
# ref = Array(Int, len)
|
||||
# state = start(s)
|
||||
# i = 0
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
## Assumes length(s1) <= length(s2)
|
||||
##############################################################################
|
||||
|
||||
function common_prefix(s1::StringIterator, s2::StringIterator, lim::Integer = -1)
|
||||
function common_prefix(s1::AbstractString, s2::AbstractString, lim::Integer = -1)
|
||||
start1 = start(s1)
|
||||
start2 = start(s2)
|
||||
l = 0
|
||||
|
@ -24,7 +24,7 @@ end
|
|||
##
|
||||
##############################################################################
|
||||
|
||||
function evaluate(dist::Hamming, s1::StringIterator, s2::StringIterator, len1::Integer, len2:: Integer)
|
||||
function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString, len1::Integer, len2:: Integer)
|
||||
count = 0
|
||||
for (ch1, ch2) in zip(s1, s2)
|
||||
count += ch1 != ch2
|
||||
|
@ -42,7 +42,7 @@ end
|
|||
|
||||
|
||||
type Levenshtein <: SemiMetric end
|
||||
function evaluate(dist::Levenshtein, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
|
||||
# prefix common to both strings can be ignored
|
||||
k, start1, start2 = common_prefix(s1, s2)
|
||||
|
@ -90,7 +90,7 @@ end
|
|||
|
||||
type DamerauLevenshtein <: SemiMetric end
|
||||
|
||||
function evaluate(dist::DamerauLevenshtein, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
|
||||
# prefix common to both strings can be ignored
|
||||
k, start1, start2 = common_prefix(s1, s2)
|
||||
|
@ -158,7 +158,7 @@ end
|
|||
|
||||
type Jaro <: SemiMetric end
|
||||
|
||||
function evaluate(dist::Jaro, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
# if len2 == 0, m = 0 so should be 1.0 according to wikipedia. Nope.
|
||||
len2 == 0 && return 0.0
|
||||
|
||||
|
@ -199,4 +199,4 @@ function evaluate(dist::Jaro, s1::StringIterator, s2::StringIterator, len1::Inte
|
|||
return 1.0 - score
|
||||
end
|
||||
|
||||
jaro(s1::StringIterator, s2::StringIterator) = evaluate(Jaro(), s1, s2)
|
||||
jaro(s1::AbstractString, s2::AbstractString) = evaluate(Jaro(), s1, s2)
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
##
|
||||
##############################################################################
|
||||
|
||||
immutable QGramIterator{S <: StringIterator, T <: Integer}
|
||||
immutable QGramIterator{S <: AbstractString, T <: Integer}
|
||||
s::S # grapheorstring
|
||||
l::Int # length of string
|
||||
q::T # length of q-grams
|
||||
|
@ -23,7 +23,7 @@ function Base.done(qgram::QGramIterator, state)
|
|||
istart, idend = state
|
||||
done(qgram.s, idend)
|
||||
end
|
||||
Base.eltype(qgram::QGramIterator) = iteratortype(qgram.s){SubString{eltype(qgram.s)}}
|
||||
Base.eltype(qgram::QGramIterator) = SubString{typeof(qgram.s)}
|
||||
Base.length(qgram::QGramIterator) = max(qgram.l - qgram.q + 1, 0)
|
||||
function Base.collect(qgram::QGramIterator)
|
||||
x = Array(eltype(qgram), length(qgram))
|
||||
|
@ -80,7 +80,7 @@ end
|
|||
##############################################################################
|
||||
abstract AbstractQGram <: SemiMetric
|
||||
|
||||
function evaluate(dist::AbstractQGram, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function evaluate(dist::AbstractQGram, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
sort1 = sort(QGramIterator(s1, len1, dist.q))
|
||||
sort2 = sort(QGramIterator(s2, len2, dist.q))
|
||||
evaluate(dist, CountInterator(sort1, sort2))
|
||||
|
|
|
@ -9,7 +9,7 @@ type Partial{T <: PreMetric} <: PreMetric
|
|||
end
|
||||
|
||||
# general
|
||||
function compare(dist::Partial, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function compare(dist::Partial, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
len1 == len2 && return compare(dist.dist, s1, s2, len1, len2)
|
||||
len1 == 0 && return compare(dist.dist, "", "", 0, 0)
|
||||
iter = QGramIterator(s2, len2, len1)
|
||||
|
@ -26,7 +26,7 @@ end
|
|||
|
||||
# Specialization for RatcliffObershelp distance
|
||||
# Code follows https://github.com/seatgeek/fuzzywuzzy/blob/master/fuzzywuzzy/fuzz.py
|
||||
function compare(dist::Partial{RatcliffObershelp}, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function compare(dist::Partial{RatcliffObershelp}, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
len1 == len2 && return compare(dist.dist, s1, s2, len1, len2)
|
||||
out = 0.0
|
||||
result = matching_blocks(s1, s2)
|
||||
|
@ -59,14 +59,15 @@ type TokenSort{T <: PreMetric} <: PreMetric
|
|||
dist::T
|
||||
end
|
||||
|
||||
function compare(dist::TokenSort, s1::StringIterator, s2::StringIterator,
|
||||
function compare(dist::TokenSort, s1::AbstractString, s2::AbstractString,
|
||||
len1::Integer, len2::Integer)
|
||||
if search(s1.s, Base._default_delims) > 0
|
||||
s1 = iteratortype(s1)(join(sort!(split(s1.s)), " "))
|
||||
if search(s1, Base._default_delims) > 0
|
||||
s1 = iterator(typeof(s1), join(sort!(split(s1)), " "))
|
||||
end
|
||||
if search(s2.s, Base._default_delims) > 0
|
||||
s2 = iteratortype(s2)(join(sort!(split(s2.s)), " "))
|
||||
if search(s2, Base._default_delims) > 0
|
||||
s2 = iterator(typeof(s2), join(sort!(split(s2)), " "))
|
||||
end
|
||||
@show s1, s2
|
||||
compare(dist.dist, s1, s2)
|
||||
end
|
||||
|
||||
|
@ -80,12 +81,12 @@ type TokenSet{T <: PreMetric} <: PreMetric
|
|||
dist::T
|
||||
end
|
||||
|
||||
function compare(dist::TokenSet, s1::StringIterator, s2::StringIterator,
|
||||
function compare(dist::TokenSet, s1::AbstractString, s2::AbstractString,
|
||||
len1::Integer, len2::Integer)
|
||||
v0, v1, v2 = _separate!(split(s1.s), split(s2.s))
|
||||
s0 = iteratortype(s1)(join(v0, " "))
|
||||
s1 = iteratortype(s1)(join(chain(v0, v1), " "))
|
||||
s2 = iteratortype(s1)(join(chain(v0, v2), " "))
|
||||
v0, v1, v2 = _separate!(split(s1), split(s2))
|
||||
s0 = iterator(typeof(s1), join(v0, " "))
|
||||
s1 = iterator(typeof(s1), join(chain(v0, v1), " "))
|
||||
s2 = iterator(typeof(s2), join(chain(v0, v2), " "))
|
||||
if isempty(s0)
|
||||
# otherwise compare(dist, "", "a")== 1.0
|
||||
compare(dist.dist, s1, s2)
|
||||
|
@ -128,7 +129,7 @@ type TokenMax{T <: PreMetric} <: PreMetric
|
|||
dist::T
|
||||
end
|
||||
|
||||
function compare(dist::TokenMax, s1::StringIterator, s2::StringIterator,
|
||||
function compare(dist::TokenMax, s1::AbstractString, s2::AbstractString,
|
||||
len1::Integer, len2::Integer)
|
||||
base = compare(dist.dist, s1, s2, len1, len2)
|
||||
unbase_scale = 0.95
|
||||
|
|
|
@ -13,7 +13,7 @@ end
|
|||
# restrict to distance between 0 and 1
|
||||
Winkler(x) = Winkler(x, 0.1, 0.7)
|
||||
|
||||
function compare(dist::Winkler, s1::StringIterator, s2::StringIterator, len1::Integer, len2::Integer)
|
||||
function compare(dist::Winkler, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer)
|
||||
score = compare(dist.dist, s1, s2, len1, len2)
|
||||
l = common_prefix(s1, s2, 4)[1]
|
||||
# common prefix adjustment
|
||||
|
|
|
@ -122,22 +122,22 @@ stringdist(strings[1,], strings[2,], method = "qgram", q = 1)
|
|||
|
||||
# grapheme
|
||||
strings = [
|
||||
(graphemes("martha"), graphemes("marhta")),
|
||||
(graphemes("dwayne"), graphemes("duane") ),
|
||||
(graphemes("dixon"), graphemes("dicksonx")),
|
||||
(graphemes("william"), graphemes("williams")),
|
||||
(graphemes(""), graphemes("foo")),
|
||||
(graphemes("a"), graphemes("a")),
|
||||
(graphemes("abc"), graphemes("xyz")),
|
||||
(graphemes("abc"), graphemes("ccc")),
|
||||
(graphemes("kitten"), graphemes("sitting")),
|
||||
(graphemes("saturday"), graphemes("sunday")),
|
||||
(graphemes("hi, my name is"), graphemes("my name is")),
|
||||
(graphemes("alborgów"), graphemes("amoniak")),
|
||||
(graphemes("cape sand recycling "), graphemes("edith ann graham")),
|
||||
(graphemes( "jellyifhs"), graphemes("jellyfish")),
|
||||
(graphemes("ifhs"), graphemes("fish")),
|
||||
(graphemes("leia"), graphemes("leela")),
|
||||
(graphemeiterator("martha"), graphemeiterator("marhta")),
|
||||
(graphemeiterator("dwayne"), graphemeiterator("duane") ),
|
||||
(graphemeiterator("dixon"), graphemeiterator("dicksonx")),
|
||||
(graphemeiterator("william"), graphemeiterator("williams")),
|
||||
(graphemeiterator(""), graphemeiterator("foo")),
|
||||
(graphemeiterator("a"), graphemeiterator("a")),
|
||||
(graphemeiterator("abc"), graphemeiterator("xyz")),
|
||||
(graphemeiterator("abc"), graphemeiterator("ccc")),
|
||||
(graphemeiterator("kitten"), graphemeiterator("sitting")),
|
||||
(graphemeiterator("saturday"), graphemeiterator("sunday")),
|
||||
(graphemeiterator("hi, my name is"), graphemeiterator("my name is")),
|
||||
(graphemeiterator("alborgów"), graphemeiterator("amoniak")),
|
||||
(graphemeiterator("cape sand recycling "), graphemeiterator("edith ann graham")),
|
||||
(graphemeiterator( "jellyifhs"), graphemeiterator("jellyfish")),
|
||||
(graphemeiterator("ifhs"), graphemeiterator("fish")),
|
||||
(graphemeiterator("leia"), graphemeiterator("leela")),
|
||||
]
|
||||
|
||||
|
||||
|
@ -149,4 +149,4 @@ for x in solutions
|
|||
end
|
||||
|
||||
@test evaluate(Hamming(), "b\u0300", "a") == 2
|
||||
@test evaluate(Hamming(), graphemes("b\u0300"), graphemes("a")) == 1
|
||||
@test evaluate(Hamming(), graphemeiterator("b\u0300"), graphemeiterator("a")) == 1
|
|
@ -70,3 +70,13 @@ s = "HSINCHUANG"
|
|||
|
||||
|
||||
@test_approx_eq compare(TokenMax(RatcliffObershelp()),"mariners vs angels", "") 0.0
|
||||
|
||||
|
||||
|
||||
@test_approx_eq compare(TokenSort(RatcliffObershelp()), graphemeiterator("New York Mets vs Atlanta Braves"), graphemeiterator("Atlanta Braves vs New York Mets")) 1.0
|
||||
@test_approx_eq compare(TokenSet(RatcliffObershelp()),graphemeiterator("mariners vs angels"), graphemeiterator("los angeles angels of anaheim at seattle mariners")) 1.0 - 0.09090909090909094
|
||||
@test_approx_eq compare(TokenSort(RatcliffObershelp()), graphemeiterator("New York Mets vs Atlanta Braves"), graphemeiterator("")) 0.0
|
||||
@test_approx_eq compare(TokenSet(RatcliffObershelp()),graphemeiterator("mariners vs angels"), graphemeiterator("")) 0.0
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue