StringDistances.jl/src/edit.jl


##############################################################################
##
## Hamming
##
##############################################################################
function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString;
    max_dist = typemax(Int))
    current = abs(length(s2) - length(s1))
    current >= max_dist && return max_dist
    for (ch1, ch2) in zip(s1, s2)
        current += ch1 != ch2
        current >= max_dist && return max_dist
    end
    return current
end

##############################################################################
##
## Jaro
##
##############################################################################
"""
    Jaro()

Creates the Jaro metric

The Jaro distance is defined as


``1 - (m / |s1| + m / |s2| + (m - t) / m) / 3``

where ``m`` is the number of matching characters and 
``t`` is half the number of transpositions.
"""
struct Jaro <: SemiMetric end

## http://alias-i.com/lingpipe/docs/api/com/aliasi/spell/JaroWinklerDistance.html
function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString;
    max_dist = Inf)
    s2, len2, s1, len1 = reorder(s1, s2)
    # if both are empty, m = 0 so should be 1.0 according to wikipedia. Add this line so that not the case
    len2 == 0 && return 0.0
    # Time-Efficient Execution of Bounded Jaro-Winkler Distances Equation (4)
    1 - (2 / 3 + len1 / (3 * len2)) >= max_dist && return max_dist
    maxdist = max(0, div(len2, 2) - 1)
    flag = fill(false, len2)
    prevstate1 = firstindex(s1)
    i1_match = prevstate1 * ones(Int, len1)
    #  m counts number matching characters
    m = 0 
    i1 = 1
    i2 = 1
    x1 = iterate(s1)
    x2 = iterate(s2)
    while x1 !== nothing
        ch1, state1 = x1
        if i2 <= i1 - maxdist - 1
            ch2, state2 = x2
            i2 += 1
            x2 = iterate(s2, state2)
        end 
        i2curr = i2
        x2curr = x2
        while x2curr !== nothing
            (i2curr > i1 + maxdist) && break
            ch2, state2 = x2curr
            if (ch1 == ch2) & !flag[i2curr] 
                m += 1
                flag[i2curr] = true
                i1_match[m] = prevstate1
                break
            end
            x2curr = iterate(s2, state2) 
            i2curr += 1
        end
        x1 = iterate(s1, state1)
        i1 += 1
        prevstate1 = state1
    end
    m == 0 && return min(1.0, max_dist)
    # t counts number of transpositions
    t = 0
    i1 = 0
    i2 = 0
    for ch2 in s2
        i2 += 1
        if flag[i2]
            i1 += 1
            t += ch2 != iterate(s1, i1_match[i1])[1]
        end
    end
    current = 1.0 - (m / len1 + m / len2 + (m - t/2) / m) / 3.0
    return min(current, max_dist)
end

##############################################################################
##
## Levenshtein
##
##############################################################################
"""
    Levenshtein()

Creates the Levenshtein metric

The Levenshtein distance is the minimum number of operations (consisting of insertions, deletions, substitutions of a single character) required to change one string into the other.
"""
struct Levenshtein <: SemiMetric end

## Source: http://blog.softwx.net/2014/12/optimizing-levenshtein-algorithm-in-c.html
function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString;
    max_dist = typemax(Int))
    s2, len2, s1, len1 = reorder(s1, s2)
    len2 - len1 >= max_dist && return max_dist
    # prefix common to both strings can be ignored
    k, x1, x2start = remove_prefix(s1, s2)
    (x1 == nothing) && return len2 - k
    # distance initialized to first row of matrix
    # => distance between "" and s2[1:i}
    v0 = collect(1:(len2 - k))
    current = 0
    i1 = 1
    while x1 !== nothing
        ch1, state1 = x1
        left = i1 - 1
        current = i1 - 1
        min_dist = i1 - 2 
        i2 = 1
        x2 = x2start
        while x2 !== nothing
            ch2, state2 = x2
            #  update
            above, current, left = current, left, v0[i2]
            if ch1 != ch2
                # substitution
                current = min(current + 1, above + 1, left + 1)
            end
            min_dist = min(min_dist, left)
            v0[i2] = current
            x2 = iterate(s2, state2)
            i2 += 1
        end
        min_dist >= max_dist && return max_dist
        x1 = iterate(s1, state1)
        i1 += 1
    end
    return min(current, max_dist)
end

##############################################################################
##
## Damerau Levenshtein
##
##############################################################################
"""
    DamerauLevenshtein()

Creates the DamerauLevenshtein metric

The DamerauLevenshtein distance is the minimum number of operations (consisting of insertions, deletions or substitutions of a single character, or transposition of two adjacent characters) required to change one string into the other.
"""
struct DamerauLevenshtein <: SemiMetric end

## http://blog.softwx.net/2015/01/optimizing-damerau-levenshtein_15.html
function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString;
    max_dist = typemax(Int))
    s2, len2, s1, len1 = reorder(s1, s2)
    len2 - len1 >= max_dist && return max_dist
    # prefix common to both strings can be ignored
    k, x1, x2start = remove_prefix(s1, s2)
    (x1 == nothing) && return len2 - k
    v0 = collect(1:(len2 - k))
    v2 = similar(v0)
    i1 = 1
    current = i1
    prevch1, = x1
    while x1 !== nothing
        ch1, state1 = x1
        left = (i1 - 1) 
        current = i1 
        nextTransCost = 0
        prevch2, = x2start
        x2 = x2start
        i2 = 1
        while x2 !== nothing
            ch2, state2 = x2
            above = current
            thisTransCost = nextTransCost
            nextTransCost = v2[i2]
            # cost of diagonal (substitution)
            v2[i2] = current = left
            # left now equals current cost (which will be diagonal at next iteration)
            left = v0[i2]
            if ch1 != ch2
                # insertion
                if left < current
                    current = left
                end
                # deletion
                if above < current
                    current = above
                end
                current += 1
                if (i1 != 1) & (i2 != 1) & (ch1 == prevch2) & (prevch1 == ch2)
                    thisTransCost += 1
                    if thisTransCost < current
                        current = thisTransCost
                    end
                end
            end
            v0[i2] = current
            x2 = iterate(s2, state2)
            i2 += 1
            prevch2 = ch2
        end
        (v0[i1 + len2 - len1] >= max_dist) && return max_dist
        x1 = iterate(s1, state1)
        i1 += 1
        prevch1 = ch1
    end
    return current
end


##############################################################################
##
## Ratcliff/Obershelp
##
##############################################################################
"""
    RatcliffObershelp()

Creates the RatcliffObershelp metric

The distance between two strings is defined as one minus  the number of matching characters divided by the total number of characters in the two strings. Matching characters are those in the longest common subsequence plus, recursively, matching characters in the unmatched region on either side of the longest common subsequence.
"""
struct RatcliffObershelp <: PreMetric end

function evaluate(dist::RatcliffObershelp, s1::AbstractString, s2::AbstractString; max_dist = Inf)
    n_matched = sum(last.(matching_blocks(s1, s2)))  
    len1, len2 = length(s1), length(s2)
    len1 + len2 == 0 ? 0 : min(1.0 - 2 *  n_matched / (len1 + len2), max_dist)
end

function matching_blocks(s1::AbstractString, s2::AbstractString)
    matching_blocks!(Set{Tuple{Int, Int, Int}}(), s1, s2, length(s1), length(s2), 1, 1)
end

function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer, start1::Integer, start2::Integer)
    a = longest_common_substring(s1, s2, len1 , len2)
    # exit if there is no common substring
    a[3] == 0 && return x
    # add the info of the common to the existing set
    push!(x, (a[1] + start1 - 1, a[2] + start2 - 1, a[3]))
    # add the longest common substring that happens before
    s1before = SubString(s1, 1, nextind(s1, 0, a[1] - 1))
    s2before = SubString(s2, 1, nextind(s2, 0, a[2] - 1))
    matching_blocks!(x, s1before, s2before, a[1] - 1, a[2] - 1, start1, start2)
    # add the longest common substring that happens after
    s1after = SubString(s1, nextind(s1, 0, a[1] + a[3]), lastindex(s1))
    s2after = SubString(s2, nextind(s2, 0, a[2] + a[3]), lastindex(s2))
    matching_blocks!(x, s1after, s2after, len1 - (a[1] + a[3]) + 1, len2 - (a[2] + a[3]) + 1, start1 + a[1] + a[3] - 1, start2 + a[2] + a[3] - 1)
    return x
end
add winkler and normalized 2015-10-25 16:23:46 +01:00
add grams 2015-10-23 16:12:51 +02:00			`##############################################################################`
			`##`
			`## Hamming`
			`##`
			`##############################################################################`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`function evaluate(dist::Hamming, s1::AbstractString, s2::AbstractString;`
			`max_dist = typemax(Int))`
clean 2019-08-17 17:40:26 +02:00			`current = abs(length(s2) - length(s1))`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`current >= max_dist && return max_dist`
store string length 2015-11-03 19:07:17 +01:00			`for (ch1, ch2) in zip(s1, s2)`
clean 2019-08-17 17:40:26 +02:00			`current += ch1 != ch2`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`current >= max_dist && return max_dist`
add grams 2015-10-23 16:12:51 +02:00			`end`
clean 2019-08-17 17:40:26 +02:00			`return current`
add grams 2015-10-23 16:12:51 +02:00			`end`

clean 2019-08-18 18:52:37 +02:00			`##############################################################################`
			`##`
			`## Jaro`
			`##`
			`##############################################################################`
			`"""`
			`Jaro()`

			`Creates the Jaro metric`

			`The Jaro distance is defined as`


			``1 - (m / \|s1\| + m / \|s2\| + (m - t) / m) / 3``

			where ``m`` is the number of matching characters and
			``t`` is half the number of transpositions.
			`"""`
			`struct Jaro <: SemiMetric end`

			`## http://alias-i.com/lingpipe/docs/api/com/aliasi/spell/JaroWinklerDistance.html`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`function evaluate(dist::Jaro, s1::AbstractString, s2::AbstractString;`
			`max_dist = Inf)`
clean 2019-08-18 18:52:37 +02:00			`s2, len2, s1, len1 = reorder(s1, s2)`
			`# if both are empty, m = 0 so should be 1.0 according to wikipedia. Add this line so that not the case`
			`len2 == 0 && return 0.0`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`# Time-Efficient Execution of Bounded Jaro-Winkler Distances Equation (4)`
			`1 - (2 / 3 + len1 / (3 * len2)) >= max_dist && return max_dist`
clean 2019-08-18 18:52:37 +02:00			`maxdist = max(0, div(len2, 2) - 1)`
			`flag = fill(false, len2)`
			`prevstate1 = firstindex(s1)`
			`i1_match = prevstate1 * ones(Int, len1)`
			`# m counts number matching characters`
			`m = 0`
			`i1 = 1`
			`i2 = 1`
			`x1 = iterate(s1)`
			`x2 = iterate(s2)`
			`while x1 !== nothing`
			`ch1, state1 = x1`
			`if i2 <= i1 - maxdist - 1`
			`ch2, state2 = x2`
			`i2 += 1`
			`x2 = iterate(s2, state2)`
			`end`
			`i2curr = i2`
			`x2curr = x2`
			`while x2curr !== nothing`
			`(i2curr > i1 + maxdist) && break`
			`ch2, state2 = x2curr`
			`if (ch1 == ch2) & !flag[i2curr]`
			`m += 1`
			`flag[i2curr] = true`
			`i1_match[m] = prevstate1`
			`break`
			`end`
			`x2curr = iterate(s2, state2)`
			`i2curr += 1`
			`end`
			`x1 = iterate(s1, state1)`
			`i1 += 1`
			`prevstate1 = state1`
			`end`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`m == 0 && return min(1.0, max_dist)`
clean 2019-08-18 18:52:37 +02:00			`# t counts number of transpositions`
			`t = 0`
			`i1 = 0`
			`i2 = 0`
			`for ch2 in s2`
			`i2 += 1`
			`if flag[i2]`
			`i1 += 1`
			`t += ch2 != iterate(s1, i1_match[i1])[1]`
			`end`
			`end`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`current = 1.0 - (m / len1 + m / len2 + (m - t/2) / m) / 3.0`
			`return min(current, max_dist)`
clean 2019-08-18 18:52:37 +02:00			`end`

add grams 2015-10-23 16:12:51 +02:00			`##############################################################################`
			`##`
refractor 2015-11-02 18:54:47 +01:00			`## Levenshtein`
add grams 2015-10-23 16:12:51 +02:00			`##`
			`##############################################################################`
add comments 2019-08-18 01:45:31 +02:00			`"""`
			`Levenshtein()`

			`Creates the Levenshtein metric`
add winkler and normalized 2015-10-25 16:23:46 +01:00
add comments 2019-08-18 01:45:31 +02:00			`The Levenshtein distance is the minimum number of operations (consisting of insertions, deletions, substitutions of a single character) required to change one string into the other.`
			`"""`
update to 0.6 2017-05-12 23:41:56 +02:00			`struct Levenshtein <: SemiMetric end`
add grams 2015-10-23 16:12:51 +02:00
clean 2019-08-18 18:52:37 +02:00			`## Source: http://blog.softwx.net/2014/12/optimizing-levenshtein-algorithm-in-c.html`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`function evaluate(dist::Levenshtein, s1::AbstractString, s2::AbstractString;`
			`max_dist = typemax(Int))`
simplify len + correct Jaro 2017-08-05 20:45:19 +02:00			`s2, len2, s1, len1 = reorder(s1, s2)`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`len2 - len1 >= max_dist && return max_dist`
clean 2019-08-17 17:40:26 +02:00			`# prefix common to both strings can be ignored`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`k, x1, x2start = remove_prefix(s1, s2)`
update to 0.7 2018-07-04 20:02:50 +02:00			`(x1 == nothing) && return len2 - k`
add unicode support 2015-10-24 18:45:24 +02:00			`# distance initialized to first row of matrix`
			`# => distance between "" and s2[1:i}`
simplify 2018-07-04 21:26:24 +02:00			`v0 = collect(1:(len2 - k))`
update to 0.7 2018-07-04 20:02:50 +02:00			`current = 0`
simplify logic 2018-07-04 21:47:11 +02:00			`i1 = 1`
update with dictionary + faster 2018-07-04 23:27:40 +02:00			`while x1 !== nothing`
update to 0.7 2018-07-04 20:02:50 +02:00			`ch1, state1 = x1`
clean 2019-08-17 17:40:26 +02:00			`left = i1 - 1`
			`current = i1 - 1`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`min_dist = i1 - 2`
simplify logic 2018-07-04 21:47:11 +02:00			`i2 = 1`
simplify logic 2018-07-04 21:04:06 +02:00			`x2 = x2start`
update with dictionary + faster 2018-07-04 23:27:40 +02:00			`while x2 !== nothing`
update to 0.7 2018-07-04 20:02:50 +02:00			`ch2, state2 = x2`
add unicode support 2015-10-24 18:45:24 +02:00			`# update`
			`above, current, left = current, left, v0[i2]`
			`if ch1 != ch2`
			`# substitution`
clean 2019-08-17 17:40:26 +02:00			`current = min(current + 1, above + 1, left + 1)`
add grams 2015-10-23 16:12:51 +02:00			`end`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`min_dist = min(min_dist, left)`
add unicode support 2015-10-24 18:45:24 +02:00			`v0[i2] = current`
update to 0.7 2018-07-04 20:02:50 +02:00			`x2 = iterate(s2, state2)`
simplify logic 2018-07-04 21:47:11 +02:00			`i2 += 1`
add grams 2015-10-23 16:12:51 +02:00			`end`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`min_dist >= max_dist && return max_dist`
update to 0.7 2018-07-04 20:02:50 +02:00			`x1 = iterate(s1, state1)`
simplify logic 2018-07-04 21:47:11 +02:00			`i1 += 1`
add grams 2015-10-23 16:12:51 +02:00			`end`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`return min(current, max_dist)`
add grams 2015-10-23 16:12:51 +02:00			`end`

refractor 2015-11-02 18:54:47 +01:00			`##############################################################################`
			`##`
			`## Damerau Levenshtein`
			`##`
			`##############################################################################`
add comments 2019-08-18 01:45:31 +02:00			`"""`
			`DamerauLevenshtein()`
refractor 2015-11-02 18:54:47 +01:00
add comments 2019-08-18 01:45:31 +02:00			`Creates the DamerauLevenshtein metric`

			`The DamerauLevenshtein distance is the minimum number of operations (consisting of insertions, deletions or substitutions of a single character, or transposition of two adjacent characters) required to change one string into the other.`
			`"""`
update to 0.6 2017-05-12 23:41:56 +02:00			`struct DamerauLevenshtein <: SemiMetric end`
add grams 2015-10-23 16:12:51 +02:00
clean 2019-08-18 18:52:37 +02:00			`## http://blog.softwx.net/2015/01/optimizing-damerau-levenshtein_15.html`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`function evaluate(dist::DamerauLevenshtein, s1::AbstractString, s2::AbstractString;`
			`max_dist = typemax(Int))`
update 0.7 2018-07-04 18:33:13 +02:00			`s2, len2, s1, len1 = reorder(s1, s2)`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`len2 - len1 >= max_dist && return max_dist`
add winkler and normalized 2015-10-25 16:23:46 +01:00			`# prefix common to both strings can be ignored`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`k, x1, x2start = remove_prefix(s1, s2)`
update to 0.7 2018-07-04 20:02:50 +02:00			`(x1 == nothing) && return len2 - k`
simplify 2018-07-04 21:26:24 +02:00			`v0 = collect(1:(len2 - k))`
			`v2 = similar(v0)`
simplify logic 2018-07-04 21:47:11 +02:00			`i1 = 1`
			`current = i1`
simplify logic 2018-07-04 21:04:06 +02:00			`prevch1, = x1`
clean 2019-08-17 17:40:26 +02:00			`while x1 !== nothing`
update to 0.7 2018-07-04 20:02:50 +02:00			`ch1, state1 = x1`
add unicode support 2015-10-24 18:45:24 +02:00			`left = (i1 - 1)`
			`current = i1`
add grams 2015-10-23 16:12:51 +02:00			`nextTransCost = 0`
simplify logic 2018-07-04 21:04:06 +02:00			`prevch2, = x2start`
			`x2 = x2start`
simplify logic 2018-07-04 21:47:11 +02:00			`i2 = 1`
clean 2019-08-17 17:40:26 +02:00			`while x2 !== nothing`
update to 0.7 2018-07-04 20:02:50 +02:00			`ch2, state2 = x2`
add grams 2015-10-23 16:12:51 +02:00			`above = current`
			`thisTransCost = nextTransCost`
add unicode support 2015-10-24 18:45:24 +02:00			`nextTransCost = v2[i2]`
			`# cost of diagonal (substitution)`
			`v2[i2] = current = left`
			`# left now equals current cost (which will be diagonal at next iteration)`
			`left = v0[i2]`
add grams 2015-10-23 16:12:51 +02:00			`if ch1 != ch2`
add unicode support 2015-10-24 18:45:24 +02:00			`# insertion`
add grams 2015-10-23 16:12:51 +02:00			`if left < current`
			`current = left`
			`end`
add unicode support 2015-10-24 18:45:24 +02:00			`# deletion`
add grams 2015-10-23 16:12:51 +02:00			`if above < current`
			`current = above`
			`end`
			`current += 1`
clean 2019-08-17 17:40:26 +02:00			`if (i1 != 1) & (i2 != 1) & (ch1 == prevch2) & (prevch1 == ch2)`
add grams 2015-10-23 16:12:51 +02:00			`thisTransCost += 1`
			`if thisTransCost < current`
			`current = thisTransCost`
			`end`
			`end`
			`end`
add unicode support 2015-10-24 18:45:24 +02:00			`v0[i2] = current`
update to 0.7 2018-07-04 20:02:50 +02:00			`x2 = iterate(s2, state2)`
simplify logic 2018-07-04 21:47:11 +02:00			`i2 += 1`
simplify logic 2018-07-04 21:04:06 +02:00			`prevch2 = ch2`
add grams 2015-10-23 16:12:51 +02:00			`end`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`(v0[i1 + len2 - len1] >= max_dist) && return max_dist`
update to 0.7 2018-07-04 20:02:50 +02:00			`x1 = iterate(s1, state1)`
simplify logic 2018-07-04 21:47:11 +02:00			`i1 += 1`
simplify logic 2018-07-04 21:04:06 +02:00			`prevch1 = ch1`
add grams 2015-10-23 16:12:51 +02:00			`end`
			`return current`
			`end`

simplify Radclikff 2019-08-17 18:57:35 +02:00
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00

simplify Radclikff 2019-08-17 18:57:35 +02:00			`##############################################################################`
			`##`
			`## Ratcliff/Obershelp`
			`##`
			`##############################################################################`
add comments 2019-08-18 01:45:31 +02:00			`"""`
			`RatcliffObershelp()`

			`Creates the RatcliffObershelp metric`
simplify Radclikff 2019-08-17 18:57:35 +02:00
add comments 2019-08-18 01:45:31 +02:00			`The distance between two strings is defined as one minus the number of matching characters divided by the total number of characters in the two strings. Matching characters are those in the longest common subsequence plus, recursively, matching characters in the unmatched region on either side of the longest common subsequence.`
			`"""`
simplify Radclikff 2019-08-17 18:57:35 +02:00			`struct RatcliffObershelp <: PreMetric end`

add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`function evaluate(dist::RatcliffObershelp, s1::AbstractString, s2::AbstractString; max_dist = Inf)`
add 2019-08-17 22:12:41 +02:00			`n_matched = sum(last.(matching_blocks(s1, s2)))`
			`len1, len2 = length(s1), length(s2)`
add maximum distance for Jaro, Levenshtein, DamerauLevenshtein 2019-08-19 19:12:55 +02:00			`len1 + len2 == 0 ? 0 : min(1.0 - 2 * n_matched / (len1 + len2), max_dist)`
simplify Radclikff 2019-08-17 18:57:35 +02:00			`end`

			`function matching_blocks(s1::AbstractString, s2::AbstractString)`
			`matching_blocks!(Set{Tuple{Int, Int, Int}}(), s1, s2, length(s1), length(s2), 1, 1)`
			`end`

			`function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, s1::AbstractString, s2::AbstractString, len1::Integer, len2::Integer, start1::Integer, start2::Integer)`
			`a = longest_common_substring(s1, s2, len1 , len2)`
Update edit.jl 2019-08-17 19:18:13 +02:00			`# exit if there is no common substring`
			`a[3] == 0 && return x`
			`# add the info of the common to the existing set`
			`push!(x, (a[1] + start1 - 1, a[2] + start2 - 1, a[3]))`
			`# add the longest common substring that happens before`
			`s1before = SubString(s1, 1, nextind(s1, 0, a[1] - 1))`
			`s2before = SubString(s2, 1, nextind(s2, 0, a[2] - 1))`
			`matching_blocks!(x, s1before, s2before, a[1] - 1, a[2] - 1, start1, start2)`
			`# add the longest common substring that happens after`
			`s1after = SubString(s1, nextind(s1, 0, a[1] + a[3]), lastindex(s1))`
			`s2after = SubString(s2, nextind(s2, 0, a[2] + a[3]), lastindex(s2))`
			`matching_blocks!(x, s1after, s2after, len1 - (a[1] + a[3]) + 1, len2 - (a[2] + a[3]) + 1, start1 + a[1] + a[3] - 1, start2 + a[2] + a[3] - 1)`
simplify Radclikff 2019-08-17 18:57:35 +02:00			`return x`
			`end`