added doc strings and upped the dependency and CI to Julia 1.3

pull/36/head
Robert Feldt 2020-10-24 21:01:39 +02:00
parent cacbbc5487
commit 9d28c36ed5
3 changed files with 44 additions and 3 deletions

View File

@ -2,7 +2,7 @@ language: julia
os: os:
- linux - linux
julia: julia:
- 1.0 - 1.3
- 1.5 - 1.5
- nightly - nightly
matrix: matrix:

View File

@ -7,7 +7,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
[compat] [compat]
Distances = "0.8.1, 0.9, 0.10" Distances = "0.8.1, 0.9, 0.10"
julia = "1" julia = "1.3"
[extras] [extras]
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

View File

@ -99,6 +99,24 @@ abstract type AbstractQGramCounts{Q,K} end
q(qc::AbstractQGramCounts{Q,K}) where {Q,K} = Q q(qc::AbstractQGramCounts{Q,K}) where {Q,K} = Q
counts(qc::AbstractQGramCounts) = qc.counts counts(qc::AbstractQGramCounts) = qc.counts
"""
QGramDict(s, q::Integer = 2)
Creates a QGramDict that pre-calculates (pre-counts) the qgrams
of a string or stream. This enables faster calculation of QGram
distances.
Note that the qgram length must correspond with the q length used
in the distance.
## Examples
```julia
str1, str2 = "my string", "another string"
qd1 = QGramDict(str1, 2)
qd2 = QGramDict(str2, 2)
evaluate(Overlap(2), qd1, qd2)
```
"""
struct QGramDict{Q,K} <: AbstractQGramCounts{Q,K} struct QGramDict{Q,K} <: AbstractQGramCounts{Q,K}
counts::Dict{K,Int} counts::Dict{K,Int}
end end
@ -109,7 +127,30 @@ function QGramDict(s::Union{AbstractString, AbstractVector}, q::Integer = 2)
end end
QGramDict(s, q::Integer = 2) = QGramDict(collect(s), q) QGramDict(s, q::Integer = 2) = QGramDict(collect(s), q)
# Faster (than QgramDict) with the qgrams presorted """
QGramSortedVector(s, q::Integer = 2)
Creates a QGramSortedVector that pre-calculates (pre-counts) the
qgrams of a string or stream. This enables faster calculation of
QGram distances.
Since qgrams are sorted in lexicographic order QGram distances can be
calculated even faster than when using a QGramDict. However, the
sorting means that updating the counts after creation is less
efficient. However, for most use cases QGramSortedVector is preferred
over a QgramDict.
Note that the qgram length must correspond with the q length used
in the distance.
## Examples
```julia
str1, str2 = "my string", "another string"
qs1 = QGramSortedVector(str1, 2)
qs2 = QGramSortedVector(str2, 2)
evaluate(Jaccard(2), qs1, qs2)
```
"""
struct QGramSortedVector{Q,K} <: AbstractQGramCounts{Q,K} struct QGramSortedVector{Q,K} <: AbstractQGramCounts{Q,K}
counts::Vector{Pair{K,Int}} counts::Vector{Pair{K,Int}}
end end