Compare commits
27 Commits
Author | SHA1 | Date |
---|---|---|
Brandon Taylor | 673e30ce1f | |
bramtayl | 3f6dfb22d1 | |
Steven G. Johnson | 93ecaf5ccf | |
Steven G. Johnson | 26bfe8d705 | |
Steven G. Johnson | ab60bb59d8 | |
Brandon Taylor | 54ecf83c40 | |
Brandon Taylor | 1a8fad9cb5 | |
Brandon Taylor | 54da038df9 | |
Brandon Taylor | 88c6e10e83 | |
Brandon Taylor | 75d1b6d74c | |
Steven G. Johnson | 9c8c04e950 | |
bramtayl | 251f16ce9f | |
Steven G. Johnson | e6c0702811 | |
Steven G. Johnson | 457f4104d4 | |
Steven G. Johnson | 75809edb6b | |
Steven G. Johnson | 85e5456187 | |
bramtayl | b16776190b | |
Brandon Taylor | 6e1e18e89b | |
Brandon Taylor | 1c31cd6795 | |
Brandon Taylor | 352997ae86 | |
Brandon Taylor | 19b3a8804e | |
Brandon Taylor | 687aea8d99 | |
Brandon Taylor | d4d33933d4 | |
Brandon Taylor | 2054013b30 | |
Brandon Taylor | 233386140a | |
Brandon Taylor | 10d0eebc31 | |
Brandon Taylor | e8e0983528 |
|
@ -0,0 +1,3 @@
|
|||
*.jl.cov
|
||||
*.jl.*.cov
|
||||
*.jl.mem
|
|
@ -0,0 +1,14 @@
|
|||
# Documentation: http://docs.travis-ci.com/user/languages/julia/
|
||||
language: julia
|
||||
os:
|
||||
- linux
|
||||
julia:
|
||||
- 0.6
|
||||
- nightly
|
||||
notifications:
|
||||
email: false
|
||||
after_success:
|
||||
# build documentation
|
||||
- julia -e 'cd(Pkg.dir("BibTeX")); Pkg.add("Documenter"); include(joinpath("docs", "make.jl"))'
|
||||
# push coverage results to Codecov
|
||||
- julia -e 'cd(Pkg.dir("BibTeX")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
|
|
@ -0,0 +1,41 @@
|
|||
The BibTeX.jl package is licensed under the MIT "Expat" License:
|
||||
|
||||
|
||||
> Copyright (c) 2017: Brandon Taylor.
|
||||
>
|
||||
>
|
||||
> Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
>
|
||||
> of this software and associated documentation files (the "Software"), to deal
|
||||
>
|
||||
> in the Software without restriction, including without limitation the rights
|
||||
>
|
||||
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
>
|
||||
> copies of the Software, and to permit persons to whom the Software is
|
||||
>
|
||||
> furnished to do so, subject to the following conditions:
|
||||
>
|
||||
>
|
||||
>
|
||||
> The above copyright notice and this permission notice shall be included in all
|
||||
>
|
||||
> copies or substantial portions of the Software.
|
||||
>
|
||||
>
|
||||
>
|
||||
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
>
|
||||
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
>
|
||||
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
>
|
||||
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
>
|
||||
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
>
|
||||
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
>
|
||||
> SOFTWARE.
|
||||
>
|
||||
>
|
|
@ -0,0 +1,20 @@
|
|||
# BibTeX
|
||||
|
||||
[![travis badge][travis_badge]][travis_url]
|
||||
[![codecov badge][codecov_badge]][codecov_url]
|
||||
|
||||
## Documentation [here][documenter_latest]
|
||||
|
||||
Change documentation link to `documenter_stable` once published!
|
||||
|
||||
[travis_badge]: https://travis-ci.org/bramtayl/BibTeX.jl.svg?branch=master
|
||||
[travis_url]: https://travis-ci.org/bramtayl/BibTeX.jl
|
||||
|
||||
[appveyor_badge]: https://ci.appveyor.com/api/projects/status/github/bramtayl/BibTeX.jl?svg=true&branch=master
|
||||
[appveyor_url]: https://ci.appveyor.com/project/bramtayl/bibtex-jl
|
||||
|
||||
[codecov_badge]: http://codecov.io/github/bramtayl/BibTeX.jl/coverage.svg?branch=master
|
||||
[codecov_url]: http://codecov.io/github/bramtayl/BibTeX.jl?branch=master
|
||||
|
||||
[documenter_stable]: https://bramtayl.github.io/BibTeX.jl/stable
|
||||
[documenter_latest]: https://bramtayl.github.io/BibTeX.jl/latest
|
|
@ -0,0 +1,26 @@
|
|||
environment:
|
||||
matrix:
|
||||
- JULIAVERSION: "julialang/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe"
|
||||
- JULIAVERSION: "julialang/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe"
|
||||
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
|
||||
- JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /release-.*/
|
||||
notifications:
|
||||
- provider: Email
|
||||
on_build_success: false
|
||||
on_build_failure: false
|
||||
on_build_status_changed: false
|
||||
install:
|
||||
- ps: (new-object net.webclient).DownloadFile(
|
||||
$("http://s3.amazonaws.com/"+$env:JULIAVERSION),
|
||||
"C:\projects\julia-binary.exe")
|
||||
- C:\projects\julia-binary.exe /S /D=C:\projects\julia
|
||||
build_script:
|
||||
- IF EXIST .git\shallow (git fetch --unshallow)
|
||||
- C:\projects\julia\bin\julia -e "versioninfo();
|
||||
Pkg.clone(pwd(), \"BibTeX\"); Pkg.build(\"BibTeX\")"
|
||||
test_script:
|
||||
- C:\projects\julia\bin\julia -e "Pkg.test(\"BibTeX\")"
|
|
@ -0,0 +1,2 @@
|
|||
build/
|
||||
site/
|
|
@ -0,0 +1,8 @@
|
|||
import Documenter
|
||||
|
||||
Documenter.deploydocs(
|
||||
repo = "github.com/bramtayl/BibTeX.jl.git",
|
||||
target = "build",
|
||||
deps = nothing,
|
||||
make = nothing
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
# BibTeX.jl
|
||||
|
||||
```@index
|
||||
```
|
||||
|
||||
```@autodocs
|
||||
Modules = [BibTeX]
|
||||
```
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,9 @@
|
|||
module BibTeX
|
||||
export Bibliography, Citation
|
||||
|
||||
include("parser.jl")
|
||||
include("citation.jl")
|
||||
include("bibliography.jl")
|
||||
include("latex.jl")
|
||||
|
||||
end
|
|
@ -0,0 +1,39 @@
|
|||
struct Bibliography <: Associative{String,Citation}
|
||||
preamble::String
|
||||
data::Dict{String,Citation}
|
||||
end
|
||||
|
||||
"""
|
||||
Bibliography(bibtex::String)
|
||||
Bibliography(io::IO)
|
||||
|
||||
Given a string (or IO stream) of bibtex-format bibliography data,
|
||||
parses the data and returns a `Dict`-like object `b::Bibliography` that
|
||||
behaves as a dictionary mapping strings to bibliography items
|
||||
[`Citation`](@ref).
|
||||
"""
|
||||
function Bibliography(bibtex::String)
|
||||
preamble, data = parse_bibtex(bibtex)
|
||||
return Bibliography(preamble, Dict(k=>Citation!(v) for (k,v) in data))
|
||||
end
|
||||
Bibliography(io::IO) = Bibliography(readstring(io))
|
||||
Base.open(::Type{Bibliography}, args...) = open(io -> Bibliography(io), args...)
|
||||
|
||||
Base.similar(b::Bibliography) = Bibliography("", Dict{String,Citation}())
|
||||
Base.rehash!(b::Bibliography, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
|
||||
Base.sizehint!(b::Bibliography, n) = begin sizehint!(b.data, n); b; end
|
||||
Base.empty!(b::Bibliography) = begin empty!(b.data); b; end
|
||||
Base.copy(b::Bibliography) = Bibliography(b.preamble, copy(b.data))
|
||||
|
||||
function Base.setindex!(b::Bibliography, v::Citation, k::AbstractString)
|
||||
b.data[String(k)] = v
|
||||
return b
|
||||
end
|
||||
Base.get(b::Bibliography, k::AbstractString, default) = get(b.data, String(k), default)
|
||||
|
||||
Base.start(b::Bibliography) = start(b.data)
|
||||
Base.done(b::Bibliography, i) = done(b.data, i)
|
||||
Base.next(b::Bibliography, i) = next(b.data, i)
|
||||
Base.length(b::Bibliography) = length(b.data)
|
||||
|
||||
# todo: add specialized Base.show methods for MIME"text/bibtex" etc.
|
|
@ -0,0 +1,42 @@
|
|||
"""
|
||||
Citation{S}(data::Dict{String,String})
|
||||
|
||||
A bibliography item in a bibTeX database, based on a dictionary of
|
||||
strings to values. It is parameterized by a symbol `S` giving the
|
||||
type of the item (`:article` etcetera). A `b::Citation` supports
|
||||
`b[key]` access to retrieve the data and in general acts like
|
||||
a dictionary from `String` to `String`.
|
||||
"""
|
||||
struct Citation{S} <: Associative{String,String}
|
||||
data::Dict{String,String}
|
||||
end
|
||||
Citation{S}() where {S} = Citation{S}(Dict{String,String}())
|
||||
|
||||
function Citation!(data::Dict{String,String})
|
||||
S = Symbol(pop!(data, "__type__"))
|
||||
return Citation{S}(data)
|
||||
end
|
||||
|
||||
Base.similar(b::Citation{S}) where {S} = Citation{S}(Dict{String,String}())
|
||||
Base.rehash!(b::Citation, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
|
||||
Base.sizehint!(b::Citation, n) = begin sizehint!(b.data, n); b; end
|
||||
Base.empty!(b::Citation) = begin empty!(b.data); b; end
|
||||
Base.copy(b::Citation{S}) where {S} = Citation{S}(copy(b.data))
|
||||
|
||||
Base.get(b::Citation, k::AbstractString, default) = get(b.data, String(k), default)
|
||||
Base.getindex(b::Citation, k::AbstractString) = getindex(b.data, String(k))
|
||||
function Base.setindex!(b::Citation, v::AbstractString, k::AbstractString)
|
||||
b.data[String(k)] = String(v)
|
||||
return b
|
||||
end
|
||||
|
||||
Base.start(b::Citation) = start(b.data)
|
||||
Base.done(b::Citation, i) = done(b.data, i)
|
||||
Base.next(b::Citation, i) = next(b.data, i)
|
||||
Base.length(b::Citation) = length(b.data)
|
||||
|
||||
function Base.show{S}(io::IO, b::Citation{S})
|
||||
print(io, "Citation{:$S}(", length(b), " entries)")
|
||||
end
|
||||
|
||||
# TODO: add Base.show text/plain and text/markdown for formatted citation
|
|
@ -0,0 +1,361 @@
|
|||
# conversion of LaTeX directives to plain text, markdown, etc.
|
||||
#
|
||||
# The basic idea is that we search for `\foo{argument}`, `{\foo argument}`,
|
||||
# or `{\foo{argument}}`, and look up `foo` in a dictionary of substitutions
|
||||
# like `\textit` -> `*#1*` where #1 is where the (first) argument is
|
||||
# substituted. Then we have separate dictionary entries for text/plain,
|
||||
# text/markdown, etcetera.
|
||||
|
||||
###########################################################################
|
||||
# parsing LaTeX directives:
|
||||
|
||||
const BACKSLASH = UInt8('\\')
|
||||
const BRACE_OPEN = UInt8('{')
|
||||
const BRACE_CLOSE = UInt8('}')
|
||||
const SPACE = UInt8(' ')
|
||||
const DOLLAR = UInt8('$')
|
||||
const CARET = UInt8('^')
|
||||
const UNDERSCORE = UInt8('_')
|
||||
is_letter(x::UInt8) = UInt8('a') ≤ x ≤ UInt8('z') || UInt8('A') ≤ x ≤ UInt8('Z')
|
||||
is_alphanumeric(x::UInt8) = UInt8('0') ≤ x ≤ UInt8('9') || is_letter(x)
|
||||
|
||||
"""
|
||||
search_latex_directive(astring, start_position = 1, inbrace=false)
|
||||
|
||||
Search for a LaTeX directive \\directive{argument} or similar in `string`, returning
|
||||
`(start_position, directive_end, argument_end)` such that `string[start_position:directive_end]` gives `\\directive` and `string[directive_end+1:argument_end]`
|
||||
gives `{argument}`. Use [`strip_argument`](@ref) to remove surrounding braces and whitespace
|
||||
from the `argument`.
|
||||
"""
|
||||
function search_latex_directive(astring, start_position = 1)
|
||||
string_length = sizeof(astring)
|
||||
if !(0 < start_position ≤ string_length)
|
||||
0, 0, 0
|
||||
else
|
||||
character_vector = Vector{UInt8}(astring)
|
||||
index = start_position
|
||||
all_spaces = true
|
||||
|
||||
# find \foo directive or {...}:
|
||||
character = UInt8(0)
|
||||
while index ≤ string_length
|
||||
character = character_vector[index]
|
||||
if (character == BACKSLASH || character == BRACE_OPEN || character == CARET || character == UNDERSCORE)
|
||||
break
|
||||
end
|
||||
if character != SPACE
|
||||
all_spaces = false
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
if index ≤ string_length && character != BRACE_OPEN
|
||||
directive_start = index
|
||||
if character == BACKSLASH
|
||||
index += 2
|
||||
if index - 1 > string_length
|
||||
return 0,0,0
|
||||
end
|
||||
if is_letter(character_vector[index - 1])
|
||||
while index ≤ string_length && is_letter(character_vector[index])
|
||||
index += 1
|
||||
end
|
||||
end
|
||||
directive_end = index - 1
|
||||
else
|
||||
directive_end = directive_start # ^ or _
|
||||
index += 1
|
||||
end
|
||||
|
||||
# look for optional opening brace
|
||||
while index ≤ string_length && character_vector[index] == SPACE
|
||||
index += 1
|
||||
end
|
||||
if index > string_length
|
||||
return directive_start, directive_end, string_length
|
||||
end
|
||||
in_braces = character_vector[index] == BRACE_OPEN
|
||||
if !in_braces
|
||||
# search backwards from \foo to look for { \foo ...}
|
||||
backwards_index = directive_start - 1
|
||||
while backwards_index ≥ start_position && character_vector[backwards_index] == SPACE
|
||||
backwards_index -= 1
|
||||
end
|
||||
if backwards_index < start_position || character_vector[backwards_index] != BRACE_OPEN
|
||||
if character_vector[index] == BACKSLASH
|
||||
# argument is another latex directive
|
||||
inner_start_position, inner_directive_end, inner_argument_end = search_latex_directive(astring, index)
|
||||
return directive_start, directive_end, inner_argument_end
|
||||
elseif character != BACKSLASH
|
||||
# in an equation, token is a single char
|
||||
return directive_start, directive_end, index
|
||||
elseif all_spaces
|
||||
# if `\directive ...` was preceded only
|
||||
# by whitespace, then assume arguments
|
||||
# extend to the end of the string. This
|
||||
# happens when we recurse on `{\directive ...}`.
|
||||
return directive_start, directive_end, string_length
|
||||
else
|
||||
# argument is not in braces … get next token
|
||||
while index ≤ string_length && is_alphanumeric(character_vector[index])
|
||||
index += 1
|
||||
end
|
||||
return directive_start, directive_end, index - 1
|
||||
end
|
||||
end
|
||||
end
|
||||
index += 1
|
||||
elseif index > string_length
|
||||
return 0, 0, 0
|
||||
else # { ... }
|
||||
directive_start = index
|
||||
directive_end = index - 1
|
||||
in_braces = true
|
||||
index += 1
|
||||
end
|
||||
|
||||
# search for end of argument (closing brace)
|
||||
number_of_braces = 1
|
||||
while index ≤ string_length
|
||||
character = character_vector[index]
|
||||
if character == BRACE_OPEN
|
||||
number_of_braces += 1
|
||||
elseif character == BRACE_CLOSE
|
||||
number_of_braces -= 1
|
||||
if number_of_braces == 0
|
||||
argument_end = if in_braces
|
||||
index
|
||||
else
|
||||
index - 1
|
||||
end
|
||||
return directive_start, directive_end, argument_end
|
||||
end
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
directive_start, directive_end, string_length
|
||||
end
|
||||
end
|
||||
|
||||
"""
|
||||
strip_argument(astring, start_position = start(astring), end_position = endof(astring))
|
||||
|
||||
Return the substring of `astring` corresponding to the argument from `start_position:end_position`, stripping
|
||||
leading/trailing whitespace and braces.
|
||||
"""
|
||||
function strip_argument(astring, start_position = start(astring), end_position = endof(astring))
|
||||
if start_position > end_position
|
||||
SubString(astring, 1, 0)
|
||||
else
|
||||
string_length = endof(astring)
|
||||
if !(1 ≤ start_position ≤ string_length && 1 ≤ end_position ≤ string_length)
|
||||
throw(BoundsError())
|
||||
else
|
||||
character_vector = Vector{UInt8}(astring)
|
||||
if character_vector[end_position] == BRACE_CLOSE
|
||||
end_position -= 1 # omit brace
|
||||
while start_position ≤ end_position && character_vector[start_position] != BRACE_OPEN
|
||||
start_position += 1
|
||||
end
|
||||
if start_position > end_position
|
||||
error("malformed argument")
|
||||
end
|
||||
start_position += 1 # omit brace
|
||||
end
|
||||
while start_position ≤ end_position && character_vector[end_position] == SPACE
|
||||
end_position -= 1
|
||||
end
|
||||
while start_position ≤ end_position && character_vector[start_position] == SPACE
|
||||
start_position += 1
|
||||
end
|
||||
SubString(astring, start_position, end_position)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# to make replace work for LaTeX directives with our
|
||||
# custom search function, all we need to do is to define
|
||||
# a LaTeXDirectiveSearch type such that search(s, ::LaTeXDirectiveSearch, index)
|
||||
# returns the range of the directive
|
||||
struct LaTeXDirectiveSearch; end
|
||||
function Base.search(s::AbstractString, ::LaTeXDirectiveSearch, index)
|
||||
start_position, directive_end, argument_end = search_latex_directive(s, index)
|
||||
if start_position < index
|
||||
0:-1
|
||||
else
|
||||
start_position:argument_end
|
||||
end
|
||||
end
|
||||
###########################################################################
|
||||
|
||||
# Unicode substitutions for LaTeX directives
|
||||
const latex_unicode = Dict(
|
||||
# accent escapes like `\"u` for `ü`, from the list at
|
||||
# https://en.wikibooks.org/wiki/LaTeX/Special_Characters
|
||||
# converted to LaTeX characters (mostly combining marks)
|
||||
"\\`" => "#1\u0300",
|
||||
"\\'" => "#1\u0301",
|
||||
"\\^" => "#1\u0302",
|
||||
"\\\"" => "#1\u0308",
|
||||
"\\H" => "#1\u030b",
|
||||
"\\~" => "#1\u0303",
|
||||
"\\c" => "#1\u0327",
|
||||
"\\k" => "#1\u0328",
|
||||
"\\l" => "\u0142",
|
||||
"\\=" => "#1\u0304",
|
||||
"\\b" => "#1\u0331",
|
||||
"\\." => "#1\u0307",
|
||||
"\\d" => "#1\u0323",
|
||||
"\\r" => "#1\u030a",
|
||||
"\\u" => "#1\u0306",
|
||||
"\\v" => "#1\u030c",
|
||||
"\\t" => "#1\u0361", # fixme: u+0361 should go after first char in #1
|
||||
"\\o" => "\u00f8",
|
||||
"\\i" => "\u0131",
|
||||
"\\j" => "\u0237",
|
||||
|
||||
# other backslash escapes
|
||||
"\\\\" => "\\",
|
||||
"\\{" => "{", "\\}" => "}",
|
||||
"\\%" => "%",
|
||||
# "\\\$" => "\$" -- dollar signs will be unescaped in strip_dollars
|
||||
|
||||
# We parse {....} quoting as an empty directive:
|
||||
"" => "#1",
|
||||
|
||||
# many other substitutions can be found in
|
||||
# Base.REPLCompletions.latex_symbols
|
||||
)
|
||||
|
||||
# LaTeX directives converted to Markdown
|
||||
const markdown_directives = Dict(
|
||||
"\\emph" => "_#1_",
|
||||
"\\textit" => "_#1_",
|
||||
"\\it" => "_#1_",
|
||||
"\\mathit" => "_#1_",
|
||||
"\\textbf" => "**#1**",
|
||||
"\\bf" => "**#1**",
|
||||
"\\mathbf" => "**#1**",
|
||||
"\\texttt" => "`#1`",
|
||||
"\\mathrm" => "#1",
|
||||
"\\url" => "[#1](#1)",
|
||||
"\\sout" => "~~#1~~",
|
||||
"\\st" => "~~#1~~",
|
||||
"\\cancel" => "~~#1~~",
|
||||
)
|
||||
|
||||
# directives that are stripped when converting
|
||||
# to text/plain
|
||||
const text_directives = Dict(
|
||||
"\\emph" => "#1",
|
||||
"\\textit" => "#1",
|
||||
"\\it" => "#1",
|
||||
"\\mathit" => "#1",
|
||||
"\\textbf" => "#1",
|
||||
"\\bf" => "#1",
|
||||
"\\mathbf" => "#1",
|
||||
"\\texttt" => "#1",
|
||||
"\\mathrm" => "#1",
|
||||
"\\url" => "#1",
|
||||
)
|
||||
|
||||
# Unicode includes an incomplete set of super/subscript characters:
|
||||
const superscripts = Dict(
|
||||
'0'=>'⁰', '1'=>'¹', '2'=>'²', '3'=>'³', '4'=>'⁴', '5'=>'⁵', '6'=>'⁶', '7'=>'⁷', '8'=>'⁸', '9'=>'⁹',
|
||||
'a'=>'ᵃ', 'b'=>'ᵇ', 'c'=>'ᶜ', 'd'=>'ᵈ', 'e'=>'ᵉ', 'f'=>'ᶠ', 'g'=>'ᵍ', 'h'=>'ʰ',
|
||||
'i'=>'ⁱ', 'j'=>'ʲ', 'k'=>'ᵏ', 'l'=>'ˡ', 'm'=>'ᵐ', 'n'=>'ⁿ', 'o'=>'ᵒ', 'p'=>'ᵖ',
|
||||
'r'=>'ʳ', 's'=>'ˢ', 't'=>'ᵗ', 'u'=>'ᵘ', 'v'=>'ᵛ', 'w'=>'ʷ', 'x'=>'ˣ', 'y'=>'ʸ', 'z'=>'ᶻ',
|
||||
'A'=>'ᴬ', 'B'=>'ᴮ', 'C'=>'ᶜ', 'D'=>'ᴰ', 'E'=>'ᴱ', 'G'=>'ᴳ', 'H'=>'ᴴ', 'I'=>'ᴵ', 'J'=>'ᴶ',
|
||||
'K'=>'ᴷ', 'L'=>'ᴸ', 'M'=>'ᴹ', 'N'=>'ᴺ', 'O'=>'ᴼ', 'P'=>'ᴾ', 'R'=>'ᴿ', 'S'=>'ˢ', 'T'=>'ᵀ',
|
||||
'U'=>'ᵁ', 'V'=>'ⱽ', 'W'=>'ᵂ', 'β'=>'ᵝ', 'γ'=>'ᵞ', 'δ'=>'ᵟ', 'ψ'=>'ᵠ', 'χ'=>'ᵡ', 'Θ'=>'ᶿ',
|
||||
'+'=>'⁺', '-'=>'⁻', '='=>'⁼', '('=>'⁽', ')'=>'⁾', ' '=>' ', '∘'=>'°',
|
||||
)
|
||||
const subscripts = Dict(
|
||||
'0'=>'₀', '1'=>'₁', '2'=>'₂', '3'=>'₃', '4'=>'₄', '5'=>'₅', '6'=>'₆', '7'=>'₇', '8'=>'₈', '9'=>'₉',
|
||||
'a'=>'ₐ', 'e'=>'ₑ', 'h'=>'ₕ', 'i'=>'ᵢ', 'j'=>'ⱼ', 'k'=>'ₖ', 'l'=>'ₗ', 'm'=>'ₘ',
|
||||
'n'=>'ₙ', 'o'=>'ₒ', 'p'=>'ₚ', 'r'=>'ᵣ', 's'=>'ₛ', 't'=>'ₜ', 'u'=>'ᵤ', 'v'=>'ᵥ', 'x'=>'ₓ',
|
||||
'β'=>'ᵦ', 'γ'=>'ᵧ', 'ρ'=>'ᵨ', 'ψ'=>'ᵩ', 'χ'=>'ᵪ',
|
||||
'-'=>'₋', '+'=>'₊', '='=>'₌', '('=>'₍', ')'=>'₎', ' '=>' ',
|
||||
)
|
||||
|
||||
function replace_characters(astring, character_map)
|
||||
buffer = IOBuffer()
|
||||
for character in astring
|
||||
mapped_character = get(character_map, character, '\0')
|
||||
if mapped_character == '\0'
|
||||
return ""
|
||||
end
|
||||
print(buffer, mapped_character)
|
||||
end
|
||||
String(take!(buffer))
|
||||
end
|
||||
|
||||
# Given a (sub)string `s` that represents a LaTeX directive matched
|
||||
# by search_latex_directive, performs our Unicode substitutions and
|
||||
# also any additional substitutions given by extra_directives.
|
||||
function directive_substitution(astring, extra_directives)
|
||||
start_position, directive_end, argument_end = search_latex_directive(astring)
|
||||
string_length = endof(astring)
|
||||
directive = SubString(astring, start_position, directive_end)
|
||||
for dict in (extra_directives, latex_unicode, Base.REPLCompletions.latex_symbols)
|
||||
if haskey(dict, directive)
|
||||
substitution = dict[directive]
|
||||
if contains(substitution, "#1")
|
||||
argument = strip_argument(replace_directives(strip_argument(astring, directive_end + 1, string_length), extra_directives))
|
||||
return replace(substitution, "#1", argument)
|
||||
else
|
||||
argument = replace_directives(SubString(astring, directive_end+1, string_length), extra_directives)
|
||||
if strwidth(substitution) == 0 # \hat{...} etc: combining chars go after argument
|
||||
return string(strip_argument(argument), substitution)
|
||||
else
|
||||
return string(substitution, argument) # don't strip for 0-arg macros
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if directive == "^" || directive == "_" # super/subscripts
|
||||
argument = strip_argument(replace_directives(strip_argument(astring, directive_end + 1, string_length), extra_directives))
|
||||
dict = if directive == "^"
|
||||
superscripts
|
||||
else
|
||||
subscripts
|
||||
end
|
||||
substitution = replace_characters(argument, dict)
|
||||
if !isempty(substitution)
|
||||
return substitution
|
||||
end
|
||||
end
|
||||
astring # ignore unrecognized directives
|
||||
end
|
||||
|
||||
# replace all latex directives in `s` via `directive_substitution`
|
||||
replace_directives(astring, extra_directives) =
|
||||
replace(astring, LaTeXDirectiveSearch(), substitution -> directive_substitution(substitution, extra_directives))
|
||||
|
||||
# strip unescaped $ signs from s
|
||||
function strip_dollars(astring)
|
||||
buffer = IOBuffer()
|
||||
character_vector = Vector{UInt8}(astring)
|
||||
for index = 1:sizeof(astring)
|
||||
character = character_vector[index]
|
||||
if character == BACKSLASH && index < sizeof(astring) && character_vector[index + 1] == DOLLAR
|
||||
write(buffer, DOLLAR) # \$ -> $
|
||||
elseif character != DOLLAR
|
||||
write(buffer, character)
|
||||
end
|
||||
end
|
||||
return String(take!(buffer))
|
||||
end
|
||||
|
||||
"""
|
||||
simplify_latex(astring, extra_directives)
|
||||
|
||||
Simplify a LaTeX string `astring` into "plain text" if possible, stripping/converting
|
||||
known LaTeX directives in favor of e.g Unicode.
|
||||
|
||||
`extra_directives` is a dictionary (`String=>String`) that maps LaTeX directives
|
||||
to replacements. It defaults to `BibTeX.text_directives`, which simply strips
|
||||
out things like bold and italics. Alternatively, you can pass `BibTeX.markdown_directives`,
|
||||
which uses Markdown syntax for such directives.
|
||||
"""
|
||||
simplify_latex(astring, extra_directives = text_directives) =
|
||||
strip_dollars(replace_directives(astring, extra_directives))
|
|
@ -0,0 +1,218 @@
|
|||
mutable struct Parser{T}
|
||||
tokens::T
|
||||
substitutions::Dict{String, String}
|
||||
records::Dict{String, Dict{String, String}}
|
||||
line::Int
|
||||
bracket_counter::Int
|
||||
end
|
||||
|
||||
Base.eltype(p::Parser) = eltype(p.tokens)
|
||||
Base.one(p::Parser) = eltype(p)("")
|
||||
|
||||
Parser(tokens::T, substitutions, records, line, bracket_counter) where T =
|
||||
Parser{T}(tokens, substitutions, records, line, bracket_counter)
|
||||
|
||||
Parser(tokens) = Parser(tokens, Dict{String, String}(), Dict{String, Dict{String, String}}(), 1, 0)
|
||||
|
||||
parse_text(text) = matchall(r"[^\s\"#{}@,=\\]+|\s+|\"|#|{|}|@|,|=|\\", text) |> Parser
|
||||
|
||||
location(parser) = "on line $(parser.line)"
|
||||
|
||||
Base.isempty(p::Parser) = isempty(p.tokens)
|
||||
|
||||
next_token_default!(parser) =
|
||||
if isempty(parser.tokens)
|
||||
one(parser)
|
||||
else
|
||||
result = shift!(parser.tokens)
|
||||
parser.line = parser.line + count(x -> x == '\n', result)
|
||||
if all(isspace, result)
|
||||
eltype(parser)(" ")
|
||||
else
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
next_token_with_space!(parser, eol = "additional tokens") = begin
|
||||
result = next_token_default!(parser)
|
||||
if result == ""
|
||||
error("Expected $eol $(location(parser))")
|
||||
else
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
next_token!(parser, eol = "additional tokens") = begin
|
||||
result = next_token_with_space!(parser, eol)
|
||||
if all(isspace, result)
|
||||
next_token_with_space!(parser, eol)
|
||||
else
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
expect(parser, result, eol) =
|
||||
if result != eol
|
||||
error("Expected $eol $(location(parser))")
|
||||
end
|
||||
|
||||
expect!(parser, eol) =
|
||||
expect(parser, next_token!(parser, eol), eol)
|
||||
|
||||
token_and_counter!(parser, eol = "}") = begin
|
||||
token = next_token_with_space!(parser, eol)
|
||||
if token == "{"
|
||||
parser.bracket_counter += 1
|
||||
elseif token == "}"
|
||||
parser.bracket_counter -= 1
|
||||
end
|
||||
if parser.bracket_counter < 0
|
||||
error("} without corresponding { $(location(parser))")
|
||||
else
|
||||
token
|
||||
end
|
||||
end
|
||||
|
||||
value!(parser, values = eltype(parser)[]) = begin
|
||||
token = next_token!(parser)
|
||||
if token == "\""
|
||||
token = token_and_counter!(parser, "\"")
|
||||
while !(token == "\"" && parser.bracket_counter == 0)
|
||||
push!(values, token)
|
||||
token = token_and_counter!(parser, "\" or }")
|
||||
end
|
||||
elseif token == "{"
|
||||
parser.bracket_counter += 1
|
||||
token = token_and_counter!(parser)
|
||||
while parser.bracket_counter > 0
|
||||
push!(values, token)
|
||||
token = token_and_counter!(parser)
|
||||
end
|
||||
else
|
||||
push!(values, getkey(parser.substitutions, token, String(token) ) )
|
||||
end
|
||||
token = next_token!(parser, ", or }")
|
||||
if token == "#"
|
||||
push!(values, " ")
|
||||
value!(parser, values)
|
||||
else
|
||||
token, join(values)
|
||||
end
|
||||
end
|
||||
|
||||
field!(parser, dict) = begin
|
||||
token = ","
|
||||
while token == ","
|
||||
token = next_token!(parser, "a new entry or }")
|
||||
if token != "}"
|
||||
key = lowercase(token)
|
||||
if haskey(dict, key)
|
||||
error("Duplicated field $key $(location(parser))")
|
||||
else
|
||||
expect!(parser, "=")
|
||||
token, dict[key] = value!(parser)
|
||||
end
|
||||
end
|
||||
end
|
||||
expect(parser, token, "}")
|
||||
end
|
||||
|
||||
"""
|
||||
parse_bibtex(text)
|
||||
|
||||
This is a simple input parser for BibTex. I had trouble finding a standard
|
||||
specification, but I've included several features of real BibTex. Returns
|
||||
a preamble (or an empty string) and a dict of dicts.
|
||||
|
||||
```jldoctest
|
||||
julia> using BibTeX: parse_bibtex
|
||||
|
||||
julia> preamble, result = parse_bibtex(""\"
|
||||
@preamble{some instructions}
|
||||
@comment blah blah
|
||||
@string{short = long}
|
||||
@a{b,
|
||||
c = {{c} c},
|
||||
d = "d {"} d",
|
||||
e = f # short
|
||||
}
|
||||
""\");
|
||||
|
||||
julia> preamble
|
||||
"some instructions"
|
||||
|
||||
julia> result["b"]["__type__"]
|
||||
"a"
|
||||
|
||||
julia> result["b"]["c"]
|
||||
"{c} c"
|
||||
|
||||
julia> result["b"]["d"]
|
||||
"d {\\"} d"
|
||||
|
||||
julia> result["b"]["e"]
|
||||
"f short"
|
||||
|
||||
julia> parse_bibtex("@book")
|
||||
ERROR: Expected { on line 1
|
||||
[...]
|
||||
|
||||
julia> parse_bibtex("@book@")
|
||||
ERROR: Expected { on line 1
|
||||
[...]
|
||||
```
|
||||
|
||||
Repeated fields and keys are not allowed:
|
||||
|
||||
```jldoctest
|
||||
julia> using BibTeX: parse_bibtex
|
||||
|
||||
julia> parse_bibtex(""\"
|
||||
@book{abook,
|
||||
title = A}
|
||||
@book{abook,
|
||||
title = B}
|
||||
""\")
|
||||
ERROR: Duplicated id abook on line 3
|
||||
[...]
|
||||
|
||||
julia> parse_bibtex(""\"
|
||||
@book{abook,
|
||||
title = A,
|
||||
title = B}
|
||||
""\")
|
||||
ERROR: Duplicated field title on line 3
|
||||
[...]
|
||||
```
|
||||
"""
|
||||
parse_bibtex(text) = begin
|
||||
parser = parse_text(text)
|
||||
token = next_token_default!(parser)
|
||||
preamble = ""
|
||||
while token != ""
|
||||
if token == "@"
|
||||
record_type = lowercase(next_token!(parser))
|
||||
if record_type == "preamble"
|
||||
trash, preamble = value!(parser)
|
||||
elseif record_type != "comment"
|
||||
expect!(parser, "{")
|
||||
if record_type == "string"
|
||||
field!(parser, parser.substitutions)
|
||||
else
|
||||
id = next_token!(parser)
|
||||
records = parser.records
|
||||
if haskey(records, id)
|
||||
error("Duplicated id $id $(location(parser))")
|
||||
else
|
||||
dict = Dict("__type__" => record_type)
|
||||
expect!(parser, ",")
|
||||
field!(parser, dict)
|
||||
records[id] = dict
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
token = next_token_default!(parser)
|
||||
end
|
||||
preamble, parser.records
|
||||
end
|
|
@ -0,0 +1 @@
|
|||
Documenter
|
|
@ -0,0 +1,6 @@
|
|||
const file = joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bib") |> readstring
|
||||
|
||||
using BenchmarkTools
|
||||
using BibTeX
|
||||
|
||||
@benchmark BibTeX.parse_bibtex(file)
|
|
@ -0,0 +1,58 @@
|
|||
using BibTeX, Base.Test
|
||||
|
||||
base_file = dirname(dirname(@__FILE__))
|
||||
|
||||
import Documenter
|
||||
Documenter.makedocs(
|
||||
modules = [BibTeX],
|
||||
format = :html,
|
||||
sitename = "BibTeX.jl",
|
||||
root = joinpath(base_file, "docs"),
|
||||
pages = Any["Home" => "index.md"],
|
||||
strict = true,
|
||||
linkcheck = true,
|
||||
checkdocs = :exports,
|
||||
authors = "Brandon Taylor"
|
||||
)
|
||||
|
||||
@testset "examples.bib" begin
|
||||
# note: ".." does not work on windows
|
||||
b = open(Bibliography, joinpath(base_file, "example", "examples.bib"), "r")
|
||||
@test length(b) == 92
|
||||
@test (b["angenendt"]::Citation{:article})["date"] == "2002"
|
||||
end
|
||||
|
||||
@testset "small bib" begin
|
||||
b = Bibliography("""
|
||||
@article{foo, bar=baz}
|
||||
@book{bar, foobar=1}
|
||||
""")
|
||||
@test get(b, "foobar", nothing) === nothing
|
||||
@test get(b["foo"], "blah", nothing) === nothing
|
||||
|
||||
@test string(b["foo"]) == "Citation{:article}(1 entries)"
|
||||
|
||||
Base.rehash!(b)
|
||||
b2 = copy(b)
|
||||
@test length(b2) == length(b)
|
||||
@test isempty(sizehint!(empty!(b2),10))
|
||||
@test isempty(similar(b))
|
||||
b2["x"] = Citation{:foo}()
|
||||
b2["x"]["bar"] = "blah"
|
||||
@test length(b2) == length(b2["x"]) == 1
|
||||
@test b2["x"]["bar"] == "blah"
|
||||
@test get(b2["x"], "foo", nothing) === nothing
|
||||
@test collect(b2)[1][2] == b2["x"]
|
||||
@test collect(b2["x"])[1] == ("bar"=>"blah")
|
||||
Base.rehash!(b2["x"])
|
||||
x2 = copy(b2["x"])::Citation{:foo}
|
||||
@test length(x2) == 1
|
||||
@test isempty(similar(x2))
|
||||
@test isempty(sizehint!(empty!(x2),10))
|
||||
end
|
||||
|
||||
import BibTeX: simplify_latex, markdown_directives
|
||||
@testset "latex" begin
|
||||
@test simplify_latex(raw"foo \$$x_1x_2^\mathrm{3}$ \dot{\alpha} {quote} \% \{unquote\} \emph{bar \textbf{bold}} {\bf baz 2.0} {\^{u}}", markdown_directives) ==
|
||||
"foo \$x₁x₂³ α̇ quote % {unquote} _bar **bold**_ **baz 2.0** û"
|
||||
end
|
Loading…
Reference in New Issue