StringDistances.jl/src/StringDistances.jl

56 lines
1.7 KiB
Julia
Raw Normal View History

2015-10-22 18:12:44 +02:00
module StringDistances
##############################################################################
##
## Export
##
##############################################################################
2019-03-29 14:04:23 +01:00
2018-08-19 01:44:10 +02:00
import Base: eltype, length, iterate, ==, hash, isless, convert, show
2015-10-25 22:26:17 +01:00
import Distances: evaluate, Hamming, hamming, PreMetric, SemiMetric
2017-07-01 10:13:43 +02:00
import IterTools: chain
2015-11-04 18:40:30 +01:00
export
evaluate,
compare,
Hamming,
Levenshtein,
DamerauLevenshtein,
Jaro,
QGram,
Cosine,
Jaccard,
2015-11-05 16:51:32 +01:00
SorensenDice,
Overlap,
2015-11-04 18:40:30 +01:00
RatcliffObershelp,
Winkler,
Partial,
TokenSort,
2015-11-06 16:47:15 +01:00
TokenSet,
2018-05-17 17:41:50 +02:00
TokenMax
2015-11-06 20:43:04 +01:00
##############################################################################
##
## include
##
##############################################################################
2017-08-05 20:45:19 +02:00
include("utils.jl")
2015-11-04 18:40:30 +01:00
include("distances/edit.jl")
include("distances/qgram.jl")
include("distances/RatcliffObershelp.jl")
2017-08-05 20:45:19 +02:00
include("compare.jl")
2015-11-06 03:03:45 +01:00
end
2019-08-14 16:30:22 +02:00
##############################################################################
##
## Some memo about Strings
# length: number of characters
# ncodeunits: Return the number of code units in a string (aking to index of vector). Not all such indices are valid they may not be the start of a character,.
# sizeof: Size, in bytes, of the string str. Equal to the number of code units in str multiplied by the size, in bytes, of one code unit in str.
# lastindex: Return the last index of a collection
# nextinds(s, i): return the index of the start of the character whose encoding starts after index i
# nextind(s, 0, N): return the index of the Nth character of s (or, if there are less than N characters, return ncodeunits(str) + (N - length(s))
##############################################################################