initial attempt at latex substitutions
This commit is contained in:
parent
251f16ce9f
commit
9c8c04e950
|
@ -4,5 +4,6 @@ export Bibliography, Citation
|
||||||
include("parser.jl")
|
include("parser.jl")
|
||||||
include("citation.jl")
|
include("citation.jl")
|
||||||
include("bibliography.jl")
|
include("bibliography.jl")
|
||||||
|
include("latex.jl")
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
# conversion of LaTeX directives to plain text, markdown, etc.
|
||||||
|
#
|
||||||
|
# The basic idea is that we search for `\foo{arg}`, `{\foo arg}`,
|
||||||
|
# or `{\foo{arg}}`, and look up `foo` in a dictionary of substitutions
|
||||||
|
# like `textit` -> `*#1*` where #1 is where the (first) argument is
|
||||||
|
# substituted. Then we have separate dictionary entries for text/plain,
|
||||||
|
# text/markdown, etcetera.
|
||||||
|
|
||||||
|
# regex matching (directive,arg)
|
||||||
|
const latex_directive = r"\\(\W|[A-Za-z]+) *\{([^}]*)\}|\{ *\\(\W|[A-Za-z]+) *(\{([^}]*)\}|[^}]*)\}|\\(\W|[A-Za-z]+) *(\w*)"
|
||||||
|
|
||||||
|
# given a match m to latex_directive, return (directive,arg)
|
||||||
|
function extract_directive(m::RegexMatch)
|
||||||
|
m.captures[1] !== nothing && return (m.captures[1], m.captures[2])
|
||||||
|
m.captures[3] !== nothing && return (m.captures[3], m.captures[5] === nothing ? m.captures[4] : m.captures[5])
|
||||||
|
m.captures[6] !== nothing && return (m.captures[6], m.captures[7])
|
||||||
|
throw(ArgumentError("unknown latex_directive match"))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Unicode substitutions for LaTeX directives
|
||||||
|
const latex_unicode = Dict{String,String}(
|
||||||
|
# accent escapes like `\"u` for `ü`, from the list at
|
||||||
|
# https://en.wikibooks.org/wiki/LaTeX/Special_Characters
|
||||||
|
# converted to LaTeX characters (mostly combining marks)
|
||||||
|
"`" => "#1\u0300",
|
||||||
|
"'" => "#1\u0301",
|
||||||
|
"^" => "#1\u0302",
|
||||||
|
"\"" => "#1\u0308",
|
||||||
|
"H" => "#1\u030b",
|
||||||
|
"~" => "#1\u0303",
|
||||||
|
"c" => "#1\u0327",
|
||||||
|
"k" => "#1\u0328",
|
||||||
|
"l" => "\u0142#1",
|
||||||
|
"=" => "#1\u0304",
|
||||||
|
"b" => "#1\u0331",
|
||||||
|
"." => "#1\u0307",
|
||||||
|
"d" => "#1\u0323",
|
||||||
|
"r" => "#1\u030a",
|
||||||
|
"u" => "#1\u0306",
|
||||||
|
"v" => "#1\u030c",
|
||||||
|
"t" => "#1\u0361", # fixme: u+0361 should go after first char in #1
|
||||||
|
"o" => "\u00f8#1",
|
||||||
|
"i" => "\u0131#1",
|
||||||
|
"j" => "\u0237#1",
|
||||||
|
|
||||||
|
# many other substitutions can be found in
|
||||||
|
# Base.REPLCompletions.latex_symbols
|
||||||
|
)
|
||||||
|
|
||||||
|
# LaTeX directives converted to Markdown
|
||||||
|
const md_directives = Dict{String,String}(
|
||||||
|
"emph" => "_#1_",
|
||||||
|
"textit" => "_#1_",
|
||||||
|
"it" => "_#1_",
|
||||||
|
"textbf" => "**#1**",
|
||||||
|
"bf" => "**#1**",
|
||||||
|
"texttt" => "`#1`",
|
||||||
|
"url" => "[#1](#1)",
|
||||||
|
"sout" => "~~#1~~",
|
||||||
|
"st" => "~~#1~~",
|
||||||
|
"cancel" => "~~#1~~",
|
||||||
|
)
|
||||||
|
|
||||||
|
# directives that are stripped when converting
|
||||||
|
# to text/plain
|
||||||
|
const text_directives = Dict{String,String}(
|
||||||
|
"emph" => "#1",
|
||||||
|
"textit" => "#1",
|
||||||
|
"it" => "#1",
|
||||||
|
"textbf" => "#1",
|
||||||
|
"bf" => "#1",
|
||||||
|
"texttt" => "#1",
|
||||||
|
"url" => "#1",
|
||||||
|
"sout" => "#1",
|
||||||
|
"st" => "#1",
|
||||||
|
"cancel" => "#1",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Given a string `s` that matches the latex_directive regex,
|
||||||
|
# return a new string to replace it with. We perform substitutions
|
||||||
|
# based on extra_directives as well as on latex_unicode, from above.
|
||||||
|
function directive_substitution(s::AbstractString, extra_directives::Associative{String,String})
|
||||||
|
m = match(latex_directive, s)
|
||||||
|
m === nothing && return s
|
||||||
|
directive, arg_ = extract_directive(m)
|
||||||
|
arg = replace_directives(arg_, extra_directives) # recursively replace in args
|
||||||
|
if haskey(extra_directives, directive)
|
||||||
|
return replace(extra_directives[directive], "#1", arg)
|
||||||
|
elseif haskey(latex_unicode, directive)
|
||||||
|
return replace(latex_unicode[directive], "#1", arg)
|
||||||
|
else
|
||||||
|
bdir = string('\\', directive)
|
||||||
|
if haskey(Base.REPLCompletions.latex_symbols, bdir)
|
||||||
|
sym = Base.REPLCompletions.latex_symbols[bdir]
|
||||||
|
if isempty(sym) || strwidth(sym) > 0
|
||||||
|
return string(sym, arg)
|
||||||
|
else
|
||||||
|
return string(arg, sym) # combining character like \hat{x}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
return s # no substitutions found
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# replace all latex directives in `s` via `directive_substitution`
|
||||||
|
replace_directives(s::AbstractString, extra_directives::Associative{String,String}) =
|
||||||
|
replace(s, latex_directive, sub -> directive_substitution(sub, extra_directives))
|
|
@ -47,3 +47,9 @@ end
|
||||||
@test isempty(similar(x2))
|
@test isempty(similar(x2))
|
||||||
@test isempty(sizehint!(empty!(x2),10))
|
@test isempty(sizehint!(empty!(x2),10))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
import BibTeX: replace_directives, md_directives
|
||||||
|
@testset "latex" begin
|
||||||
|
@test replace_directives(raw"foo \emph{bar} {\bf baz} {\^{u}}", md_directives) ==
|
||||||
|
"foo _bar_ **baz** û"
|
||||||
|
end
|
||||||
|
|
Loading…
Reference in New Issue