cosmetic changes: word variables, avoiding short circuits, less returns
This commit is contained in:
parent
3f6dfb22d1
commit
673e30ce1f
363
src/latex.jl
363
src/latex.jl
|
@ -1,7 +1,7 @@
|
||||||
# conversion of LaTeX directives to plain text, markdown, etc.
|
# conversion of LaTeX directives to plain text, markdown, etc.
|
||||||
#
|
#
|
||||||
# The basic idea is that we search for `\foo{arg}`, `{\foo arg}`,
|
# The basic idea is that we search for `\foo{argument}`, `{\foo argument}`,
|
||||||
# or `{\foo{arg}}`, and look up `foo` in a dictionary of substitutions
|
# or `{\foo{argument}}`, and look up `foo` in a dictionary of substitutions
|
||||||
# like `\textit` -> `*#1*` where #1 is where the (first) argument is
|
# like `\textit` -> `*#1*` where #1 is where the (first) argument is
|
||||||
# substituted. Then we have separate dictionary entries for text/plain,
|
# substituted. Then we have separate dictionary entries for text/plain,
|
||||||
# text/markdown, etcetera.
|
# text/markdown, etcetera.
|
||||||
|
@ -10,158 +10,185 @@
|
||||||
# parsing LaTeX directives:
|
# parsing LaTeX directives:
|
||||||
|
|
||||||
const BACKSLASH = UInt8('\\')
|
const BACKSLASH = UInt8('\\')
|
||||||
const BRACEOPEN = UInt8('{')
|
const BRACE_OPEN = UInt8('{')
|
||||||
const BRACECLOSE = UInt8('}')
|
const BRACE_CLOSE = UInt8('}')
|
||||||
const SPACE = UInt8(' ')
|
const SPACE = UInt8(' ')
|
||||||
const DOLLAR = UInt8('$')
|
const DOLLAR = UInt8('$')
|
||||||
const CARET = UInt8('^')
|
const CARET = UInt8('^')
|
||||||
const UNDERSCORE = UInt8('_')
|
const UNDERSCORE = UInt8('_')
|
||||||
isalpha8(x::UInt8) = UInt8('a') ≤ x ≤ UInt8('z') || UInt8('A') ≤ x ≤ UInt8('Z')
|
is_letter(x::UInt8) = UInt8('a') ≤ x ≤ UInt8('z') || UInt8('A') ≤ x ≤ UInt8('Z')
|
||||||
isalnum8(x::UInt8) = UInt8('0') ≤ x ≤ UInt8('9') || isalpha8(x)
|
is_alphanumeric(x::UInt8) = UInt8('0') ≤ x ≤ UInt8('9') || is_letter(x)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
search_latexdirective(string, istart=1, inbrace=false)
|
search_latex_directive(astring, start_position = 1, inbrace=false)
|
||||||
|
|
||||||
Search for a LaTeX directive \\directive{arg} or similar in `string`, returning
|
Search for a LaTeX directive \\directive{argument} or similar in `string`, returning
|
||||||
`(ds, de, ae)` such that `string[ds:de]` gives `\\directive` and `string[de+1:ae]`
|
`(start_position, directive_end, argument_end)` such that `string[start_position:directive_end]` gives `\\directive` and `string[directive_end+1:argument_end]`
|
||||||
gives `{arg}`. Use [`striparg`](@ref) to remove surrounding braces and whitespace
|
gives `{argument}`. Use [`strip_argument`](@ref) to remove surrounding braces and whitespace
|
||||||
from the `arg`.
|
from the `argument`.
|
||||||
"""
|
"""
|
||||||
function search_latexdirective(s::Union{String,SubString{String}}, istart::Int=1)
|
function search_latex_directive(astring, start_position = 1)
|
||||||
e = sizeof(s)
|
string_length = sizeof(astring)
|
||||||
0 < istart ≤ e || return 0,0,0
|
if !(0 < start_position ≤ string_length)
|
||||||
p = Vector{UInt8}(s)
|
0, 0, 0
|
||||||
i = istart
|
else
|
||||||
allspaces=true
|
character_vector = Vector{UInt8}(astring)
|
||||||
|
index = start_position
|
||||||
|
all_spaces = true
|
||||||
|
|
||||||
# find \foo directive or {...}:
|
# find \foo directive or {...}:
|
||||||
c = UInt8(0)
|
character = UInt8(0)
|
||||||
while i ≤ e
|
while index ≤ string_length
|
||||||
c = p[i]
|
character = character_vector[index]
|
||||||
(c == BACKSLASH || c == BRACEOPEN || c == CARET || c == UNDERSCORE) && break
|
if (character == BACKSLASH || character == BRACE_OPEN || character == CARET || character == UNDERSCORE)
|
||||||
c != SPACE && (allspaces = false)
|
break
|
||||||
i += 1
|
end
|
||||||
end
|
if character != SPACE
|
||||||
if i ≤ e && c != BRACEOPEN
|
all_spaces = false
|
||||||
directive_start = i
|
end
|
||||||
if c == BACKSLASH
|
index += 1
|
||||||
i += 2
|
end
|
||||||
i-1 > e && return 0,0,0
|
if index ≤ string_length && character != BRACE_OPEN
|
||||||
if isalpha8(p[i-1])
|
directive_start = index
|
||||||
while i ≤ e && isalpha8(p[i])
|
if character == BACKSLASH
|
||||||
i += 1
|
index += 2
|
||||||
|
if index - 1 > string_length
|
||||||
|
return 0,0,0
|
||||||
end
|
end
|
||||||
end
|
if is_letter(character_vector[index - 1])
|
||||||
directive_end = i-1
|
while index ≤ string_length && is_letter(character_vector[index])
|
||||||
else
|
index += 1
|
||||||
directive_end = directive_start # ^ or _
|
end
|
||||||
i += 1
|
end
|
||||||
end
|
directive_end = index - 1
|
||||||
|
else
|
||||||
# look for optional opening brace
|
directive_end = directive_start # ^ or _
|
||||||
while i ≤ e && p[i] == SPACE
|
index += 1
|
||||||
i += 1
|
end
|
||||||
end
|
|
||||||
i > e && return directive_start, directive_end, e
|
# look for optional opening brace
|
||||||
inbrace = p[i] == BRACEOPEN
|
while index ≤ string_length && character_vector[index] == SPACE
|
||||||
if !inbrace
|
index += 1
|
||||||
# search backwards from \foo to look for { \foo ...}
|
end
|
||||||
j = directive_start - 1
|
if index > string_length
|
||||||
while j ≥ istart && p[j] == SPACE
|
return directive_start, directive_end, string_length
|
||||||
j -= 1
|
end
|
||||||
end
|
in_braces = character_vector[index] == BRACE_OPEN
|
||||||
if j < istart || p[j] != BRACEOPEN
|
if !in_braces
|
||||||
if p[i] == BACKSLASH
|
# search backwards from \foo to look for { \foo ...}
|
||||||
# argument is another latex directive
|
backwards_index = directive_start - 1
|
||||||
ds,de,ae = search_latexdirective(s, i)
|
while backwards_index ≥ start_position && character_vector[backwards_index] == SPACE
|
||||||
return directive_start, directive_end, ae
|
backwards_index -= 1
|
||||||
elseif c != BACKSLASH
|
end
|
||||||
# in an equation, token is a single char
|
if backwards_index < start_position || character_vector[backwards_index] != BRACE_OPEN
|
||||||
return directive_start, directive_end, i
|
if character_vector[index] == BACKSLASH
|
||||||
elseif allspaces
|
# argument is another latex directive
|
||||||
# if `\directive ...` was preceded only
|
inner_start_position, inner_directive_end, inner_argument_end = search_latex_directive(astring, index)
|
||||||
# by whitespace, then assume arguments
|
return directive_start, directive_end, inner_argument_end
|
||||||
# extend to the end of the string. This
|
elseif character != BACKSLASH
|
||||||
# happens when we recurse on `{\directive ...}`.
|
# in an equation, token is a single char
|
||||||
return directive_start, directive_end, e
|
return directive_start, directive_end, index
|
||||||
else
|
elseif all_spaces
|
||||||
# argument is not in braces … get next token
|
# if `\directive ...` was preceded only
|
||||||
while i ≤ e && isalnum8(p[i])
|
# by whitespace, then assume arguments
|
||||||
i += 1
|
# extend to the end of the string. This
|
||||||
|
# happens when we recurse on `{\directive ...}`.
|
||||||
|
return directive_start, directive_end, string_length
|
||||||
|
else
|
||||||
|
# argument is not in braces … get next token
|
||||||
|
while index ≤ string_length && is_alphanumeric(character_vector[index])
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
return directive_start, directive_end, index - 1
|
||||||
end
|
end
|
||||||
return directive_start, directive_end, i-1
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
index += 1
|
||||||
|
elseif index > string_length
|
||||||
|
return 0, 0, 0
|
||||||
|
else # { ... }
|
||||||
|
directive_start = index
|
||||||
|
directive_end = index - 1
|
||||||
|
in_braces = true
|
||||||
|
index += 1
|
||||||
end
|
end
|
||||||
i += 1
|
|
||||||
elseif i > e
|
|
||||||
return 0,0,0
|
|
||||||
else # { ... }
|
|
||||||
directive_start = i
|
|
||||||
directive_end = i - 1
|
|
||||||
inbrace = true
|
|
||||||
i += 1
|
|
||||||
end
|
|
||||||
|
|
||||||
# search for end of argument (closing brace)
|
# search for end of argument (closing brace)
|
||||||
nbraces = 1
|
number_of_braces = 1
|
||||||
while i ≤ e
|
while index ≤ string_length
|
||||||
c = p[i]
|
character = character_vector[index]
|
||||||
if c == BRACEOPEN
|
if character == BRACE_OPEN
|
||||||
nbraces += 1
|
number_of_braces += 1
|
||||||
elseif c == BRACECLOSE
|
elseif character == BRACE_CLOSE
|
||||||
nbraces -= 1
|
number_of_braces -= 1
|
||||||
if nbraces == 0
|
if number_of_braces == 0
|
||||||
return directive_start, directive_end, inbrace ? i : i-1
|
argument_end = if in_braces
|
||||||
|
index
|
||||||
|
else
|
||||||
|
index - 1
|
||||||
|
end
|
||||||
|
return directive_start, directive_end, argument_end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
index += 1
|
||||||
end
|
end
|
||||||
i += 1
|
directive_start, directive_end, string_length
|
||||||
end
|
end
|
||||||
return directive_start, directive_end, e
|
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
striparg(s, argstart=start(s), argend=endof(s))
|
strip_argument(astring, start_position = start(astring), end_position = endof(astring))
|
||||||
|
|
||||||
Return the substring of `s` corresponding to the argument from `argstart:argend`, stripping
|
Return the substring of `astring` corresponding to the argument from `start_position:end_position`, stripping
|
||||||
leading/trailing whitespace and braces.
|
leading/trailing whitespace and braces.
|
||||||
"""
|
"""
|
||||||
function striparg(s::Union{String,SubString{String}}, argstart::Int=start(s), argend::Int=endof(s))
|
function strip_argument(astring, start_position = start(astring), end_position = endof(astring))
|
||||||
argstart > argend && return SubString(s, 1, 0)
|
if start_position > end_position
|
||||||
e = endof(s)
|
SubString(astring, 1, 0)
|
||||||
(1 ≤ argstart ≤ e && 1 ≤ argend ≤ e) || throw(BoundsError())
|
else
|
||||||
|
string_length = endof(astring)
|
||||||
p = Vector{UInt8}(s)
|
if !(1 ≤ start_position ≤ string_length && 1 ≤ end_position ≤ string_length)
|
||||||
if p[argend] == BRACECLOSE
|
throw(BoundsError())
|
||||||
argend -= 1 # omit brace
|
else
|
||||||
while argstart ≤ argend && p[argstart] != BRACEOPEN
|
character_vector = Vector{UInt8}(astring)
|
||||||
argstart += 1
|
if character_vector[end_position] == BRACE_CLOSE
|
||||||
|
end_position -= 1 # omit brace
|
||||||
|
while start_position ≤ end_position && character_vector[start_position] != BRACE_OPEN
|
||||||
|
start_position += 1
|
||||||
|
end
|
||||||
|
if start_position > end_position
|
||||||
|
error("malformed argument")
|
||||||
|
end
|
||||||
|
start_position += 1 # omit brace
|
||||||
|
end
|
||||||
|
while start_position ≤ end_position && character_vector[end_position] == SPACE
|
||||||
|
end_position -= 1
|
||||||
|
end
|
||||||
|
while start_position ≤ end_position && character_vector[start_position] == SPACE
|
||||||
|
start_position += 1
|
||||||
|
end
|
||||||
|
SubString(astring, start_position, end_position)
|
||||||
end
|
end
|
||||||
argstart > argend && error("malformed argument")
|
|
||||||
argstart += 1 # omit brace
|
|
||||||
end
|
end
|
||||||
while argstart ≤ argend && p[argend] == SPACE
|
|
||||||
argend -= 1
|
|
||||||
end
|
|
||||||
while argstart ≤ argend && p[argstart] == SPACE
|
|
||||||
argstart += 1
|
|
||||||
end
|
|
||||||
return SubString(s, argstart, argend)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# to make replace work for LaTeX directives with our
|
# to make replace work for LaTeX directives with our
|
||||||
# custom search function, all we need to do is to define
|
# custom search function, all we need to do is to define
|
||||||
# a LaTeXDirectiveSearch type such that search(s, ::LaTeXDirectiveSearch, i)
|
# a LaTeXDirectiveSearch type such that search(s, ::LaTeXDirectiveSearch, index)
|
||||||
# returns the range of the directive
|
# returns the range of the directive
|
||||||
struct LaTeXDirectiveSearch; end
|
struct LaTeXDirectiveSearch; end
|
||||||
function Base.search(s::AbstractString, ::LaTeXDirectiveSearch, i::Integer)
|
function Base.search(s::AbstractString, ::LaTeXDirectiveSearch, index)
|
||||||
ds, de, ae = search_latexdirective(s, i)
|
start_position, directive_end, argument_end = search_latex_directive(s, index)
|
||||||
return ds < i ? (0:-1) : (ds:ae)
|
if start_position < index
|
||||||
|
0:-1
|
||||||
|
else
|
||||||
|
start_position:argument_end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
###########################################################################
|
###########################################################################
|
||||||
|
|
||||||
# Unicode substitutions for LaTeX directives
|
# Unicode substitutions for LaTeX directives
|
||||||
const latex_unicode = Dict{String,String}(
|
const latex_unicode = Dict(
|
||||||
# accent escapes like `\"u` for `ü`, from the list at
|
# accent escapes like `\"u` for `ü`, from the list at
|
||||||
# https://en.wikibooks.org/wiki/LaTeX/Special_Characters
|
# https://en.wikibooks.org/wiki/LaTeX/Special_Characters
|
||||||
# converted to LaTeX characters (mostly combining marks)
|
# converted to LaTeX characters (mostly combining marks)
|
||||||
|
@ -200,7 +227,7 @@ const latex_unicode = Dict{String,String}(
|
||||||
)
|
)
|
||||||
|
|
||||||
# LaTeX directives converted to Markdown
|
# LaTeX directives converted to Markdown
|
||||||
const md_directives = Dict{String,String}(
|
const markdown_directives = Dict(
|
||||||
"\\emph" => "_#1_",
|
"\\emph" => "_#1_",
|
||||||
"\\textit" => "_#1_",
|
"\\textit" => "_#1_",
|
||||||
"\\it" => "_#1_",
|
"\\it" => "_#1_",
|
||||||
|
@ -218,7 +245,7 @@ const md_directives = Dict{String,String}(
|
||||||
|
|
||||||
# directives that are stripped when converting
|
# directives that are stripped when converting
|
||||||
# to text/plain
|
# to text/plain
|
||||||
const text_directives = Dict{String,String}(
|
const text_directives = Dict(
|
||||||
"\\emph" => "#1",
|
"\\emph" => "#1",
|
||||||
"\\textit" => "#1",
|
"\\textit" => "#1",
|
||||||
"\\it" => "#1",
|
"\\it" => "#1",
|
||||||
|
@ -232,7 +259,7 @@ const text_directives = Dict{String,String}(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Unicode includes an incomplete set of super/subscript characters:
|
# Unicode includes an incomplete set of super/subscript characters:
|
||||||
const superscripts = Dict{Char,Char}(
|
const superscripts = Dict(
|
||||||
'0'=>'⁰', '1'=>'¹', '2'=>'²', '3'=>'³', '4'=>'⁴', '5'=>'⁵', '6'=>'⁶', '7'=>'⁷', '8'=>'⁸', '9'=>'⁹',
|
'0'=>'⁰', '1'=>'¹', '2'=>'²', '3'=>'³', '4'=>'⁴', '5'=>'⁵', '6'=>'⁶', '7'=>'⁷', '8'=>'⁸', '9'=>'⁹',
|
||||||
'a'=>'ᵃ', 'b'=>'ᵇ', 'c'=>'ᶜ', 'd'=>'ᵈ', 'e'=>'ᵉ', 'f'=>'ᶠ', 'g'=>'ᵍ', 'h'=>'ʰ',
|
'a'=>'ᵃ', 'b'=>'ᵇ', 'c'=>'ᶜ', 'd'=>'ᵈ', 'e'=>'ᵉ', 'f'=>'ᶠ', 'g'=>'ᵍ', 'h'=>'ʰ',
|
||||||
'i'=>'ⁱ', 'j'=>'ʲ', 'k'=>'ᵏ', 'l'=>'ˡ', 'm'=>'ᵐ', 'n'=>'ⁿ', 'o'=>'ᵒ', 'p'=>'ᵖ',
|
'i'=>'ⁱ', 'j'=>'ʲ', 'k'=>'ᵏ', 'l'=>'ˡ', 'm'=>'ᵐ', 'n'=>'ⁿ', 'o'=>'ᵒ', 'p'=>'ᵖ',
|
||||||
|
@ -242,83 +269,93 @@ const superscripts = Dict{Char,Char}(
|
||||||
'U'=>'ᵁ', 'V'=>'ⱽ', 'W'=>'ᵂ', 'β'=>'ᵝ', 'γ'=>'ᵞ', 'δ'=>'ᵟ', 'ψ'=>'ᵠ', 'χ'=>'ᵡ', 'Θ'=>'ᶿ',
|
'U'=>'ᵁ', 'V'=>'ⱽ', 'W'=>'ᵂ', 'β'=>'ᵝ', 'γ'=>'ᵞ', 'δ'=>'ᵟ', 'ψ'=>'ᵠ', 'χ'=>'ᵡ', 'Θ'=>'ᶿ',
|
||||||
'+'=>'⁺', '-'=>'⁻', '='=>'⁼', '('=>'⁽', ')'=>'⁾', ' '=>' ', '∘'=>'°',
|
'+'=>'⁺', '-'=>'⁻', '='=>'⁼', '('=>'⁽', ')'=>'⁾', ' '=>' ', '∘'=>'°',
|
||||||
)
|
)
|
||||||
const subscripts = Dict{Char,Char}(
|
const subscripts = Dict(
|
||||||
'0'=>'₀', '1'=>'₁', '2'=>'₂', '3'=>'₃', '4'=>'₄', '5'=>'₅', '6'=>'₆', '7'=>'₇', '8'=>'₈', '9'=>'₉',
|
'0'=>'₀', '1'=>'₁', '2'=>'₂', '3'=>'₃', '4'=>'₄', '5'=>'₅', '6'=>'₆', '7'=>'₇', '8'=>'₈', '9'=>'₉',
|
||||||
'a'=>'ₐ', 'e'=>'ₑ', 'h'=>'ₕ', 'i'=>'ᵢ', 'j'=>'ⱼ', 'k'=>'ₖ', 'l'=>'ₗ', 'm'=>'ₘ',
|
'a'=>'ₐ', 'e'=>'ₑ', 'h'=>'ₕ', 'i'=>'ᵢ', 'j'=>'ⱼ', 'k'=>'ₖ', 'l'=>'ₗ', 'm'=>'ₘ',
|
||||||
'n'=>'ₙ', 'o'=>'ₒ', 'p'=>'ₚ', 'r'=>'ᵣ', 's'=>'ₛ', 't'=>'ₜ', 'u'=>'ᵤ', 'v'=>'ᵥ', 'x'=>'ₓ',
|
'n'=>'ₙ', 'o'=>'ₒ', 'p'=>'ₚ', 'r'=>'ᵣ', 's'=>'ₛ', 't'=>'ₜ', 'u'=>'ᵤ', 'v'=>'ᵥ', 'x'=>'ₓ',
|
||||||
'β'=>'ᵦ', 'γ'=>'ᵧ', 'ρ'=>'ᵨ', 'ψ'=>'ᵩ', 'χ'=>'ᵪ',
|
'β'=>'ᵦ', 'γ'=>'ᵧ', 'ρ'=>'ᵨ', 'ψ'=>'ᵩ', 'χ'=>'ᵪ',
|
||||||
'-'=>'₋', '+'=>'₊', '='=>'₌', '('=>'₍', ')'=>'₎', ' '=>' ',
|
'-'=>'₋', '+'=>'₊', '='=>'₌', '('=>'₍', ')'=>'₎', ' '=>' ',
|
||||||
)
|
)
|
||||||
function replacechars(s::AbstractString, charmap::Associative{Char,Char})
|
|
||||||
buf = IOBuffer()
|
function replace_characters(astring, character_map)
|
||||||
for c in s
|
buffer = IOBuffer()
|
||||||
cm = get(charmap, c, '\0')
|
for character in astring
|
||||||
cm == '\0' && return ""
|
mapped_character = get(character_map, character, '\0')
|
||||||
print(buf, cm)
|
if mapped_character == '\0'
|
||||||
|
return ""
|
||||||
|
end
|
||||||
|
print(buffer, mapped_character)
|
||||||
end
|
end
|
||||||
return String(take!(buf))
|
String(take!(buffer))
|
||||||
end
|
end
|
||||||
|
|
||||||
# Given a (sub)string `s` that represents a LaTeX directive matched
|
# Given a (sub)string `s` that represents a LaTeX directive matched
|
||||||
# by search_latexdirective, performs our Unicode substitutions and
|
# by search_latex_directive, performs our Unicode substitutions and
|
||||||
# also any additional substitutions given by extra_directives.
|
# also any additional substitutions given by extra_directives.
|
||||||
function directive_substitution(s::AbstractString, extra_directives::Associative{String,String})
|
function directive_substitution(astring, extra_directives)
|
||||||
ds, de = search_latexdirective(s)
|
start_position, directive_end, argument_end = search_latex_directive(astring)
|
||||||
ae = endof(s)
|
string_length = endof(astring)
|
||||||
directive = SubString(s, ds, de)
|
directive = SubString(astring, start_position, directive_end)
|
||||||
for dict in (extra_directives, latex_unicode, Base.REPLCompletions.latex_symbols)
|
for dict in (extra_directives, latex_unicode, Base.REPLCompletions.latex_symbols)
|
||||||
if haskey(dict, directive)
|
if haskey(dict, directive)
|
||||||
sub = dict[directive]
|
substitution = dict[directive]
|
||||||
if contains(sub, "#1")
|
if contains(substitution, "#1")
|
||||||
arg = striparg(replace_directives(striparg(s, de+1, ae), extra_directives))
|
argument = strip_argument(replace_directives(strip_argument(astring, directive_end + 1, string_length), extra_directives))
|
||||||
return replace(sub, "#1", arg)
|
return replace(substitution, "#1", argument)
|
||||||
else
|
else
|
||||||
arg = replace_directives(SubString(s, de+1, ae), extra_directives)
|
argument = replace_directives(SubString(astring, directive_end+1, string_length), extra_directives)
|
||||||
if strwidth(sub) == 0 # \hat{...} etc: combining chars go after argument
|
if strwidth(substitution) == 0 # \hat{...} etc: combining chars go after argument
|
||||||
return string(striparg(arg), sub)
|
return string(strip_argument(argument), substitution)
|
||||||
else
|
else
|
||||||
return string(sub, arg) # don't strip for 0-arg macros
|
return string(substitution, argument) # don't strip for 0-arg macros
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if directive == "^" || directive == "_" # super/subscripts
|
if directive == "^" || directive == "_" # super/subscripts
|
||||||
arg = striparg(replace_directives(striparg(s, de+1, ae), extra_directives))
|
argument = strip_argument(replace_directives(strip_argument(astring, directive_end + 1, string_length), extra_directives))
|
||||||
sarg = replacechars(arg, directive == "^" ? superscripts : subscripts)
|
dict = if directive == "^"
|
||||||
!isempty(sarg) && return sarg
|
superscripts
|
||||||
|
else
|
||||||
|
subscripts
|
||||||
|
end
|
||||||
|
substitution = replace_characters(argument, dict)
|
||||||
|
if !isempty(substitution)
|
||||||
|
return substitution
|
||||||
|
end
|
||||||
end
|
end
|
||||||
return s # ignore unrecognized directives
|
astring # ignore unrecognized directives
|
||||||
end
|
end
|
||||||
|
|
||||||
# replace all latex directives in `s` via `directive_substitution`
|
# replace all latex directives in `s` via `directive_substitution`
|
||||||
replace_directives(s::AbstractString, extra_directives::Associative{String,String}) =
|
replace_directives(astring, extra_directives) =
|
||||||
replace(s, LaTeXDirectiveSearch(), sub -> directive_substitution(sub, extra_directives))
|
replace(astring, LaTeXDirectiveSearch(), substitution -> directive_substitution(substitution, extra_directives))
|
||||||
|
|
||||||
# strip unescaped $ signs from s
|
# strip unescaped $ signs from s
|
||||||
function strip_dollars(s::Union{String,SubString{String}})
|
function strip_dollars(astring)
|
||||||
buf = IOBuffer()
|
buffer = IOBuffer()
|
||||||
p = Vector{UInt8}(s)
|
character_vector = Vector{UInt8}(astring)
|
||||||
for i = 1:sizeof(s)
|
for index = 1:sizeof(astring)
|
||||||
c = p[i]
|
character = character_vector[index]
|
||||||
if c == BACKSLASH && i < sizeof(s) && p[i+1] == DOLLAR
|
if character == BACKSLASH && index < sizeof(astring) && character_vector[index + 1] == DOLLAR
|
||||||
write(buf, DOLLAR) # \$ -> $
|
write(buffer, DOLLAR) # \$ -> $
|
||||||
elseif c != DOLLAR
|
elseif character != DOLLAR
|
||||||
write(buf, c)
|
write(buffer, character)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return String(take!(buf))
|
return String(take!(buffer))
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
simplify_latex(s::AbstractString, extra_directives=BibTeX.text_directives)
|
simplify_latex(astring, extra_directives)
|
||||||
|
|
||||||
Simplify a LaTeX string `s` into "plain text" if possible, stripping/converting
|
Simplify a LaTeX string `astring` into "plain text" if possible, stripping/converting
|
||||||
known LaTeX directives in favor of e.g Unicode.
|
known LaTeX directives in favor of e.g Unicode.
|
||||||
|
|
||||||
`extra_directives` is a dictionary (`String=>String`) that maps LaTeX directives
|
`extra_directives` is a dictionary (`String=>String`) that maps LaTeX directives
|
||||||
to replacements. It defaults to `BibTeX.text_directives`, which simply strips
|
to replacements. It defaults to `BibTeX.text_directives`, which simply strips
|
||||||
out things like bold and italics. Alternatively, you can pass `BibTeX.md_directives`,
|
out things like bold and italics. Alternatively, you can pass `BibTeX.markdown_directives`,
|
||||||
which uses Markdown syntax for such directives.
|
which uses Markdown syntax for such directives.
|
||||||
"""
|
"""
|
||||||
simplify_latex(s::AbstractString, extra_directives::Associative{String,String}=text_directives) =
|
simplify_latex(astring, extra_directives = text_directives) =
|
||||||
strip_dollars(replace_directives(s, extra_directives))
|
strip_dollars(replace_directives(astring, extra_directives))
|
||||||
|
|
|
@ -18,6 +18,8 @@ parse_text(text) = matchall(r"[^\s\"#{}@,=\\]+|\s+|\"|#|{|}|@|,|=|\\", text) |>
|
||||||
|
|
||||||
location(parser) = "on line $(parser.line)"
|
location(parser) = "on line $(parser.line)"
|
||||||
|
|
||||||
|
Base.isempty(p::Parser) = isempty(p.tokens)
|
||||||
|
|
||||||
next_token_default!(parser) =
|
next_token_default!(parser) =
|
||||||
if isempty(parser.tokens)
|
if isempty(parser.tokens)
|
||||||
one(parser)
|
one(parser)
|
||||||
|
@ -49,14 +51,15 @@ next_token!(parser, eol = "additional tokens") = begin
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
expect(parser, result, expectation) =
|
expect(parser, result, eol) =
|
||||||
if result != expectation
|
if result != eol
|
||||||
error("Expected $expectation $(location(parser))")
|
error("Expected $eol $(location(parser))")
|
||||||
end
|
end
|
||||||
|
|
||||||
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
|
expect!(parser, eol) =
|
||||||
|
expect(parser, next_token!(parser, eol), eol)
|
||||||
|
|
||||||
token_and_counter!(parser, eol) = begin
|
token_and_counter!(parser, eol = "}") = begin
|
||||||
token = next_token_with_space!(parser, eol)
|
token = next_token_with_space!(parser, eol)
|
||||||
if token == "{"
|
if token == "{"
|
||||||
parser.bracket_counter += 1
|
parser.bracket_counter += 1
|
||||||
|
@ -80,10 +83,10 @@ value!(parser, values = eltype(parser)[]) = begin
|
||||||
end
|
end
|
||||||
elseif token == "{"
|
elseif token == "{"
|
||||||
parser.bracket_counter += 1
|
parser.bracket_counter += 1
|
||||||
token = token_and_counter!(parser, "}")
|
token = token_and_counter!(parser)
|
||||||
while parser.bracket_counter > 0
|
while parser.bracket_counter > 0
|
||||||
push!(values, token)
|
push!(values, token)
|
||||||
token = token_and_counter!(parser, "}")
|
token = token_and_counter!(parser)
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
push!(values, getkey(parser.substitutions, token, String(token) ) )
|
push!(values, getkey(parser.substitutions, token, String(token) ) )
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
using BibTeX, Base.Test
|
using BibTeX, Base.Test
|
||||||
|
|
||||||
|
base_file = dirname(dirname(@__FILE__))
|
||||||
|
|
||||||
import Documenter
|
import Documenter
|
||||||
Documenter.makedocs(
|
Documenter.makedocs(
|
||||||
modules = [BibTeX],
|
modules = [BibTeX],
|
||||||
format = :html,
|
format = :html,
|
||||||
sitename = "BibTeX.jl",
|
sitename = "BibTeX.jl",
|
||||||
root = joinpath(dirname(dirname(@__FILE__)), "docs"),
|
root = joinpath(base_file, "docs"),
|
||||||
pages = Any["Home" => "index.md"],
|
pages = Any["Home" => "index.md"],
|
||||||
strict = true,
|
strict = true,
|
||||||
linkcheck = true,
|
linkcheck = true,
|
||||||
|
@ -14,7 +16,8 @@ Documenter.makedocs(
|
||||||
)
|
)
|
||||||
|
|
||||||
@testset "examples.bib" begin
|
@testset "examples.bib" begin
|
||||||
b = open(Bibliography, joinpath("..", "example", "examples.bib"), "r")
|
# note: ".." does not work on windows
|
||||||
|
b = open(Bibliography, joinpath(base_file, "example", "examples.bib"), "r")
|
||||||
@test length(b) == 92
|
@test length(b) == 92
|
||||||
@test (b["angenendt"]::Citation{:article})["date"] == "2002"
|
@test (b["angenendt"]::Citation{:article})["date"] == "2002"
|
||||||
end
|
end
|
||||||
|
@ -48,8 +51,8 @@ end
|
||||||
@test isempty(sizehint!(empty!(x2),10))
|
@test isempty(sizehint!(empty!(x2),10))
|
||||||
end
|
end
|
||||||
|
|
||||||
import BibTeX: simplify_latex, md_directives
|
import BibTeX: simplify_latex, markdown_directives
|
||||||
@testset "latex" begin
|
@testset "latex" begin
|
||||||
@test simplify_latex(raw"foo \$$x_1x_2^\mathrm{3}$ \dot{\alpha} {quote} \% \{unquote\} \emph{bar \textbf{bold}} {\bf baz 2.0} {\^{u}}", md_directives) ==
|
@test simplify_latex(raw"foo \$$x_1x_2^\mathrm{3}$ \dot{\alpha} {quote} \% \{unquote\} \emph{bar \textbf{bold}} {\bf baz 2.0} {\^{u}}", markdown_directives) ==
|
||||||
"foo \$x₁x₂³ α̇ quote % {unquote} _bar **bold**_ **baz 2.0** û"
|
"foo \$x₁x₂³ α̇ quote % {unquote} _bar **bold**_ **baz 2.0** û"
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue