added doc strings and upped the dependency and CI to Julia 1.3

pull/36/head
Robert Feldt 2020-10-24 21:01:39 +02:00
parent cacbbc5487
commit 9d28c36ed5
3 changed files with 44 additions and 3 deletions

View File

@ -2,7 +2,7 @@ language: julia
os:
- linux
julia:
- 1.0
- 1.3
- 1.5
- nightly
matrix:

View File

@ -7,7 +7,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
[compat]
Distances = "0.8.1, 0.9, 0.10"
julia = "1"
julia = "1.3"
[extras]
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

View File

@ -99,6 +99,24 @@ abstract type AbstractQGramCounts{Q,K} end
q(qc::AbstractQGramCounts{Q,K}) where {Q,K} = Q
counts(qc::AbstractQGramCounts) = qc.counts
"""
QGramDict(s, q::Integer = 2)
Creates a QGramDict that pre-calculates (pre-counts) the qgrams
of a string or stream. This enables faster calculation of QGram
distances.
Note that the qgram length must correspond with the q length used
in the distance.
## Examples
```julia
str1, str2 = "my string", "another string"
qd1 = QGramDict(str1, 2)
qd2 = QGramDict(str2, 2)
evaluate(Overlap(2), qd1, qd2)
```
"""
struct QGramDict{Q,K} <: AbstractQGramCounts{Q,K}
counts::Dict{K,Int}
end
@ -109,7 +127,30 @@ function QGramDict(s::Union{AbstractString, AbstractVector}, q::Integer = 2)
end
QGramDict(s, q::Integer = 2) = QGramDict(collect(s), q)
# Faster (than QgramDict) with the qgrams presorted
"""
QGramSortedVector(s, q::Integer = 2)
Creates a QGramSortedVector that pre-calculates (pre-counts) the
qgrams of a string or stream. This enables faster calculation of
QGram distances.
Since qgrams are sorted in lexicographic order QGram distances can be
calculated even faster than when using a QGramDict. However, the
sorting means that updating the counts after creation is less
efficient. However, for most use cases QGramSortedVector is preferred
over a QgramDict.
Note that the qgram length must correspond with the q length used
in the distance.
## Examples
```julia
str1, str2 = "my string", "another string"
qs1 = QGramSortedVector(str1, 2)
qs2 = QGramSortedVector(str2, 2)
evaluate(Jaccard(2), qs1, qs2)
```
"""
struct QGramSortedVector{Q,K} <: AbstractQGramCounts{Q,K}
counts::Vector{Pair{K,Int}}
end