pull/1/head
matthieugomez 2015-10-24 15:10:28 -04:00
parent 58e28d3511
commit 3b63093a85
2 changed files with 13 additions and 15 deletions

View File

@ -12,15 +12,13 @@
- [x] Cosine Distance
- [x] Jaccard Distance
Supports ASCII, UTF-8 and Unicode
Support for ASCII, UTF-8 and Unicode
Examples
# Syntax
There are two possible syntaxes for each distance:
```julia
using StringDistances
hamming("MARTHA", "MARHTA")
levenshtein("MARTHA", "MARHTA")
damerau_levenshtein("MARTHA", "MARHTA")
jaro("MARTHA", "MARHTA")
jaro_winkler("MARTHA", "MARHTA"; scaling_factor = 0.1, boosting_threshold = 0.7, long_threshold = 5)
evaluate(Jaccard(2), "martha", "marhta")
jaccard("martha", "marhta"; q = 2)
```

View File

@ -2,13 +2,13 @@
using StringDistances, Base.Test
@test_approx_eq_eps jaro_winkler("martha", "marhta", boosting_threshold = 0.0, long_threshold = 100) 1 - 0.9611 1e-4
@test_approx_eq_eps jaro_winkler("dwayne", "duane", boosting_threshold = 0.0, long_threshold = 100) 1 - 0.84 1e-4
@test_approx_eq_eps jaro_winkler("dixon", "dicksonx", boosting_threshold = 0.0, long_threshold = 100) 1 - 0.81333 1e-4
@test_approx_eq_eps jaro_winkler("william", "williams", boosting_threshold = 0.0, long_threshold = 100) 1 - 0.975 1e-4
@test_approx_eq_eps jaro_winkler("", "foo", boosting_threshold = 0.0, long_threshold = 100) 1.0 1e-4
@test_approx_eq_eps jaro_winkler("a", "a", boosting_threshold = 0.0, long_threshold = 100) 0.0 1e-4
@test_approx_eq_eps jaro_winkler("abc", "xyz", boosting_threshold = 0.0, long_threshold = 100) 1.0 1e-4
@test_approx_eq_eps evaluate(JaroWinkler(0.1, 0.0, 100), "martha", "marhta") 1 - 0.9611 1e-4
@test_approx_eq_eps evaluate(JaroWinkler(0.1, 0.0, 100), "dwayne", "duane") 1 - 0.84 1e-4
@test_approx_eq_eps evaluate(JaroWinkler(0.1, 0.0, 100), "dixon", "dicksonx") 1 - 0.81333 1e-4
@test_approx_eq_eps evaluate(JaroWinkler(0.1, 0.0, 100), "william", "williams") 1 - 0.975 1e-4
@test_approx_eq_eps evaluate(JaroWinkler(0.1, 0.0, 100), "", "foo") 1.0 1e-4
@test_approx_eq_eps evaluate(JaroWinkler(0.1, 0.0, 100), "a", "a") 0.0 1e-4
@test_approx_eq_eps evaluate(JaroWinkler(0.1, 0.0, 100), "abc", "xyz") 1.0 1e-4
@test evaluate(Levenshtein(), "", "") == 0
@ -20,7 +20,7 @@ using StringDistances, Base.Test
@test evaluate(Levenshtein(), "hi, my name is", "my name is") == 4
@test evaluate(Levenshtein(), "alborgów", "amoniak") == 8
@test evaluate(Levenshtein(), "alborgów", "amoniak") == 6
@test evaluate(DamerauLevenshtein(), "", "") == 0