rmv UInt8

pull/17/head
matthieugomez 2019-03-29 09:04:23 -04:00
parent e24e758289
commit a458b3356b
3 changed files with 22 additions and 18 deletions

View File

@ -1,8 +1,8 @@
using StringDistances
srand(2)
x = map(Base.randstring, rand(5:25,500_000))
y = map(Base.randstring, rand(5:25,500_000))
using StringDistances, Random
Random.seed!(2)
x = map(Random.randstring, rand(5:25,500_000))
y = map(Random.randstring, rand(5:25,500_000))
function f(t, x, y)
[evaluate(t, x[i], y[i]) for i in 1:length(x)]
end

View File

@ -5,6 +5,13 @@ module StringDistances
## Export
##
##############################################################################
# some memo
# length: number of characters
# ncodeunits: Return the number of code units in a string (aking to index of vector). Not all such indices are valid they may not be the start of a character,.
# sizeof: Size, in bytes, of the string str. Equal to the number of code units in str multiplied by the size, in bytes, of one code unit in str.
import Base: eltype, length, iterate, ==, hash, isless, convert, show
import Distances: evaluate, Hamming, hamming, PreMetric, SemiMetric
import IterTools: chain

View File

@ -1,3 +1,4 @@
##############################################################################
##
## Define a type that iterates through q-grams of a string
@ -18,9 +19,9 @@ function Base.iterate(qgram::QGramIterator{S, N},
nextstate = nextind(qgram.s, istart), nextind(qgram.s, iend)
element, nextstate
end
Base.eltype(qgram::QGramIterator{S}) where {S} = S
Base.length(qgram::QGramIterator{S, N}) where {S, N} = max(qgram.l - N + 1, 0)
Base.eltype(qgram::QGramIterator) = String
##############################################################################
##
@ -36,35 +37,31 @@ end
# see setindex! in https://github.com/JuliaLang/julia/blob/master/base/dict.jl#L380
function CountIteratorDictionary(s1::QGramIterator{S1, N}, s2::QGramIterator{S2, N}) where {S1, S2, N}
K = eltype(s1)
d = Dict{K, NTuple{2, UInt8}}()
sizehint!(d, length(s1))
K = String
d = Dict{K, NTuple{2, Int}}()
sizehint!(d, length(s1) + length(s2))
for ch10 in s1
ch1 = convert(K, ch10)
if !isequal(ch1, ch10)
throw(ArgumentError("$(limitrepr(ch10)) is not a valid key for type $K"))
end
!isequal(ch1, ch10) && throw(ArgumentError("$(limitrepr(ch10)) is not a valid key for type $K"))
index = Base.ht_keyindex2!(d, ch1)
if index > 0
d.age += 1
@inbounds d.keys[index] = ch1
@inbounds d.vals[index] = (d.vals[index][1] + UInt8(1), UInt8(0))
@inbounds d.vals[index] = (d.vals[index][1] + 1, 0)
else
Base._setindex!(d, (UInt8(1), UInt8(0)), ch1, -index)
Base._setindex!(d, (1, 0), ch1, -index)
end
end
for ch20 in s2
ch2 = convert(K, ch20)
if !isequal(ch2, ch20)
throw(ArgumentError("$(limitrepr(ch20)) is not a valid key for type $K"))
end
!isequal(ch2, ch20) && throw(ArgumentError("$(limitrepr(ch20)) is not a valid key for type $K"))
index = Base.ht_keyindex2!(d, ch2)
if index > 0
d.age += 1
@inbounds d.keys[index] = ch2
@inbounds d.vals[index] = (d.vals[index][1], d.vals[index][2] + UInt8(1))
@inbounds d.vals[index] = (d.vals[index][1], d.vals[index][2] + 1)
else
Base._setindex!(d, (UInt8(0), UInt8(1)), ch2, -index)
Base._setindex!(d, (0, 1), ch2, -index)
end
end
return values(d)