StringDistances.jl/src/StringDistances.jl

58 lines
1.7 KiB
Julia
Raw Normal View History

2015-10-22 18:12:44 +02:00
module StringDistances
##############################################################################
##
## Export
##
##############################################################################
2019-08-17 21:46:22 +02:00
using DataStructures
2019-08-17 18:26:24 +02:00
import Base: eltype, length, iterate, ==, hash, isless, convert, show, @deprecate
2015-10-25 22:26:17 +01:00
import Distances: evaluate, Hamming, hamming, PreMetric, SemiMetric
2015-11-04 18:40:30 +01:00
export
evaluate,
compare,
Hamming,
Levenshtein,
DamerauLevenshtein,
Jaro,
2019-08-17 18:57:35 +02:00
RatcliffObershelp,
2015-11-04 18:40:30 +01:00
QGram,
Cosine,
Jaccard,
2015-11-05 16:51:32 +01:00
SorensenDice,
Overlap,
2015-11-04 18:40:30 +01:00
Winkler,
Partial,
TokenSort,
2015-11-06 16:47:15 +01:00
TokenSet,
2019-08-17 18:15:14 +02:00
TokenMax,
2019-08-20 18:32:52 +02:00
qgram,
2019-08-20 19:21:31 +02:00
find_best,
find_all
2015-11-06 20:43:04 +01:00
##############################################################################
##
## include
##
##############################################################################
2019-08-18 18:52:37 +02:00
include("utils.jl")
include("edit.jl")
include("qgram.jl")
2017-08-05 20:45:19 +02:00
include("compare.jl")
2019-08-20 19:21:31 +02:00
include("find.jl")
2015-11-06 03:03:45 +01:00
end
2019-08-14 16:30:22 +02:00
##############################################################################
##
## Some memo about Strings
# length: number of characters
# ncodeunits: Return the number of code units in a string (aking to index of vector). Not all such indices are valid they may not be the start of a character,.
# sizeof: Size, in bytes, of the string str. Equal to the number of code units in str multiplied by the size, in bytes, of one code unit in str.
# lastindex: Return the last index of a collection
# nextinds(s, i): return the index of the start of the character whose encoding starts after index i
# nextind(s, 0, N): return the index of the Nth character of s (or, if there are less than N characters, return ncodeunits(str) + (N - length(s))
##############################################################################