Compare commits
27 Commits
Author | SHA1 | Date |
---|---|---|
Brandon Taylor | 673e30ce1f | |
bramtayl | 3f6dfb22d1 | |
Steven G. Johnson | 93ecaf5ccf | |
Steven G. Johnson | 26bfe8d705 | |
Steven G. Johnson | ab60bb59d8 | |
Brandon Taylor | 54ecf83c40 | |
Brandon Taylor | 1a8fad9cb5 | |
Brandon Taylor | 54da038df9 | |
Brandon Taylor | 88c6e10e83 | |
Brandon Taylor | 75d1b6d74c | |
Steven G. Johnson | 9c8c04e950 | |
bramtayl | 251f16ce9f | |
Steven G. Johnson | e6c0702811 | |
Steven G. Johnson | 457f4104d4 | |
Steven G. Johnson | 75809edb6b | |
Steven G. Johnson | 85e5456187 | |
bramtayl | b16776190b | |
Brandon Taylor | 6e1e18e89b | |
Brandon Taylor | 1c31cd6795 | |
Brandon Taylor | 352997ae86 | |
Brandon Taylor | 19b3a8804e | |
Brandon Taylor | 687aea8d99 | |
Brandon Taylor | d4d33933d4 | |
Brandon Taylor | 2054013b30 | |
Brandon Taylor | 233386140a | |
Brandon Taylor | 10d0eebc31 | |
Brandon Taylor | e8e0983528 |
|
@ -0,0 +1,3 @@
|
||||||
|
*.jl.cov
|
||||||
|
*.jl.*.cov
|
||||||
|
*.jl.mem
|
|
@ -0,0 +1,14 @@
|
||||||
|
# Documentation: http://docs.travis-ci.com/user/languages/julia/
|
||||||
|
language: julia
|
||||||
|
os:
|
||||||
|
- linux
|
||||||
|
julia:
|
||||||
|
- 0.6
|
||||||
|
- nightly
|
||||||
|
notifications:
|
||||||
|
email: false
|
||||||
|
after_success:
|
||||||
|
# build documentation
|
||||||
|
- julia -e 'cd(Pkg.dir("BibTeX")); Pkg.add("Documenter"); include(joinpath("docs", "make.jl"))'
|
||||||
|
# push coverage results to Codecov
|
||||||
|
- julia -e 'cd(Pkg.dir("BibTeX")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
|
|
@ -0,0 +1,41 @@
|
||||||
|
The BibTeX.jl package is licensed under the MIT "Expat" License:
|
||||||
|
|
||||||
|
|
||||||
|
> Copyright (c) 2017: Brandon Taylor.
|
||||||
|
>
|
||||||
|
>
|
||||||
|
> Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
>
|
||||||
|
> of this software and associated documentation files (the "Software"), to deal
|
||||||
|
>
|
||||||
|
> in the Software without restriction, including without limitation the rights
|
||||||
|
>
|
||||||
|
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
>
|
||||||
|
> copies of the Software, and to permit persons to whom the Software is
|
||||||
|
>
|
||||||
|
> furnished to do so, subject to the following conditions:
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
> The above copyright notice and this permission notice shall be included in all
|
||||||
|
>
|
||||||
|
> copies or substantial portions of the Software.
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
>
|
||||||
|
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
>
|
||||||
|
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
>
|
||||||
|
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
>
|
||||||
|
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
>
|
||||||
|
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
>
|
||||||
|
> SOFTWARE.
|
||||||
|
>
|
||||||
|
>
|
|
@ -0,0 +1,20 @@
|
||||||
|
# BibTeX
|
||||||
|
|
||||||
|
[![travis badge][travis_badge]][travis_url]
|
||||||
|
[![codecov badge][codecov_badge]][codecov_url]
|
||||||
|
|
||||||
|
## Documentation [here][documenter_latest]
|
||||||
|
|
||||||
|
Change documentation link to `documenter_stable` once published!
|
||||||
|
|
||||||
|
[travis_badge]: https://travis-ci.org/bramtayl/BibTeX.jl.svg?branch=master
|
||||||
|
[travis_url]: https://travis-ci.org/bramtayl/BibTeX.jl
|
||||||
|
|
||||||
|
[appveyor_badge]: https://ci.appveyor.com/api/projects/status/github/bramtayl/BibTeX.jl?svg=true&branch=master
|
||||||
|
[appveyor_url]: https://ci.appveyor.com/project/bramtayl/bibtex-jl
|
||||||
|
|
||||||
|
[codecov_badge]: http://codecov.io/github/bramtayl/BibTeX.jl/coverage.svg?branch=master
|
||||||
|
[codecov_url]: http://codecov.io/github/bramtayl/BibTeX.jl?branch=master
|
||||||
|
|
||||||
|
[documenter_stable]: https://bramtayl.github.io/BibTeX.jl/stable
|
||||||
|
[documenter_latest]: https://bramtayl.github.io/BibTeX.jl/latest
|
|
@ -0,0 +1,26 @@
|
||||||
|
environment:
|
||||||
|
matrix:
|
||||||
|
- JULIAVERSION: "julialang/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe"
|
||||||
|
- JULIAVERSION: "julialang/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe"
|
||||||
|
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
|
||||||
|
- JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- master
|
||||||
|
- /release-.*/
|
||||||
|
notifications:
|
||||||
|
- provider: Email
|
||||||
|
on_build_success: false
|
||||||
|
on_build_failure: false
|
||||||
|
on_build_status_changed: false
|
||||||
|
install:
|
||||||
|
- ps: (new-object net.webclient).DownloadFile(
|
||||||
|
$("http://s3.amazonaws.com/"+$env:JULIAVERSION),
|
||||||
|
"C:\projects\julia-binary.exe")
|
||||||
|
- C:\projects\julia-binary.exe /S /D=C:\projects\julia
|
||||||
|
build_script:
|
||||||
|
- IF EXIST .git\shallow (git fetch --unshallow)
|
||||||
|
- C:\projects\julia\bin\julia -e "versioninfo();
|
||||||
|
Pkg.clone(pwd(), \"BibTeX\"); Pkg.build(\"BibTeX\")"
|
||||||
|
test_script:
|
||||||
|
- C:\projects\julia\bin\julia -e "Pkg.test(\"BibTeX\")"
|
|
@ -0,0 +1,2 @@
|
||||||
|
build/
|
||||||
|
site/
|
|
@ -0,0 +1,8 @@
|
||||||
|
import Documenter
|
||||||
|
|
||||||
|
Documenter.deploydocs(
|
||||||
|
repo = "github.com/bramtayl/BibTeX.jl.git",
|
||||||
|
target = "build",
|
||||||
|
deps = nothing,
|
||||||
|
make = nothing
|
||||||
|
)
|
|
@ -0,0 +1,8 @@
|
||||||
|
# BibTeX.jl
|
||||||
|
|
||||||
|
```@index
|
||||||
|
```
|
||||||
|
|
||||||
|
```@autodocs
|
||||||
|
Modules = [BibTeX]
|
||||||
|
```
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,9 @@
|
||||||
|
module BibTeX
|
||||||
|
export Bibliography, Citation
|
||||||
|
|
||||||
|
include("parser.jl")
|
||||||
|
include("citation.jl")
|
||||||
|
include("bibliography.jl")
|
||||||
|
include("latex.jl")
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,39 @@
|
||||||
|
struct Bibliography <: Associative{String,Citation}
|
||||||
|
preamble::String
|
||||||
|
data::Dict{String,Citation}
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
Bibliography(bibtex::String)
|
||||||
|
Bibliography(io::IO)
|
||||||
|
|
||||||
|
Given a string (or IO stream) of bibtex-format bibliography data,
|
||||||
|
parses the data and returns a `Dict`-like object `b::Bibliography` that
|
||||||
|
behaves as a dictionary mapping strings to bibliography items
|
||||||
|
[`Citation`](@ref).
|
||||||
|
"""
|
||||||
|
function Bibliography(bibtex::String)
|
||||||
|
preamble, data = parse_bibtex(bibtex)
|
||||||
|
return Bibliography(preamble, Dict(k=>Citation!(v) for (k,v) in data))
|
||||||
|
end
|
||||||
|
Bibliography(io::IO) = Bibliography(readstring(io))
|
||||||
|
Base.open(::Type{Bibliography}, args...) = open(io -> Bibliography(io), args...)
|
||||||
|
|
||||||
|
Base.similar(b::Bibliography) = Bibliography("", Dict{String,Citation}())
|
||||||
|
Base.rehash!(b::Bibliography, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
|
||||||
|
Base.sizehint!(b::Bibliography, n) = begin sizehint!(b.data, n); b; end
|
||||||
|
Base.empty!(b::Bibliography) = begin empty!(b.data); b; end
|
||||||
|
Base.copy(b::Bibliography) = Bibliography(b.preamble, copy(b.data))
|
||||||
|
|
||||||
|
function Base.setindex!(b::Bibliography, v::Citation, k::AbstractString)
|
||||||
|
b.data[String(k)] = v
|
||||||
|
return b
|
||||||
|
end
|
||||||
|
Base.get(b::Bibliography, k::AbstractString, default) = get(b.data, String(k), default)
|
||||||
|
|
||||||
|
Base.start(b::Bibliography) = start(b.data)
|
||||||
|
Base.done(b::Bibliography, i) = done(b.data, i)
|
||||||
|
Base.next(b::Bibliography, i) = next(b.data, i)
|
||||||
|
Base.length(b::Bibliography) = length(b.data)
|
||||||
|
|
||||||
|
# todo: add specialized Base.show methods for MIME"text/bibtex" etc.
|
|
@ -0,0 +1,42 @@
|
||||||
|
"""
|
||||||
|
Citation{S}(data::Dict{String,String})
|
||||||
|
|
||||||
|
A bibliography item in a bibTeX database, based on a dictionary of
|
||||||
|
strings to values. It is parameterized by a symbol `S` giving the
|
||||||
|
type of the item (`:article` etcetera). A `b::Citation` supports
|
||||||
|
`b[key]` access to retrieve the data and in general acts like
|
||||||
|
a dictionary from `String` to `String`.
|
||||||
|
"""
|
||||||
|
struct Citation{S} <: Associative{String,String}
|
||||||
|
data::Dict{String,String}
|
||||||
|
end
|
||||||
|
Citation{S}() where {S} = Citation{S}(Dict{String,String}())
|
||||||
|
|
||||||
|
function Citation!(data::Dict{String,String})
|
||||||
|
S = Symbol(pop!(data, "__type__"))
|
||||||
|
return Citation{S}(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
Base.similar(b::Citation{S}) where {S} = Citation{S}(Dict{String,String}())
|
||||||
|
Base.rehash!(b::Citation, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
|
||||||
|
Base.sizehint!(b::Citation, n) = begin sizehint!(b.data, n); b; end
|
||||||
|
Base.empty!(b::Citation) = begin empty!(b.data); b; end
|
||||||
|
Base.copy(b::Citation{S}) where {S} = Citation{S}(copy(b.data))
|
||||||
|
|
||||||
|
Base.get(b::Citation, k::AbstractString, default) = get(b.data, String(k), default)
|
||||||
|
Base.getindex(b::Citation, k::AbstractString) = getindex(b.data, String(k))
|
||||||
|
function Base.setindex!(b::Citation, v::AbstractString, k::AbstractString)
|
||||||
|
b.data[String(k)] = String(v)
|
||||||
|
return b
|
||||||
|
end
|
||||||
|
|
||||||
|
Base.start(b::Citation) = start(b.data)
|
||||||
|
Base.done(b::Citation, i) = done(b.data, i)
|
||||||
|
Base.next(b::Citation, i) = next(b.data, i)
|
||||||
|
Base.length(b::Citation) = length(b.data)
|
||||||
|
|
||||||
|
function Base.show{S}(io::IO, b::Citation{S})
|
||||||
|
print(io, "Citation{:$S}(", length(b), " entries)")
|
||||||
|
end
|
||||||
|
|
||||||
|
# TODO: add Base.show text/plain and text/markdown for formatted citation
|
|
@ -0,0 +1,361 @@
|
||||||
|
# conversion of LaTeX directives to plain text, markdown, etc.
|
||||||
|
#
|
||||||
|
# The basic idea is that we search for `\foo{argument}`, `{\foo argument}`,
|
||||||
|
# or `{\foo{argument}}`, and look up `foo` in a dictionary of substitutions
|
||||||
|
# like `\textit` -> `*#1*` where #1 is where the (first) argument is
|
||||||
|
# substituted. Then we have separate dictionary entries for text/plain,
|
||||||
|
# text/markdown, etcetera.
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
# parsing LaTeX directives:
|
||||||
|
|
||||||
|
const BACKSLASH = UInt8('\\')
|
||||||
|
const BRACE_OPEN = UInt8('{')
|
||||||
|
const BRACE_CLOSE = UInt8('}')
|
||||||
|
const SPACE = UInt8(' ')
|
||||||
|
const DOLLAR = UInt8('$')
|
||||||
|
const CARET = UInt8('^')
|
||||||
|
const UNDERSCORE = UInt8('_')
|
||||||
|
is_letter(x::UInt8) = UInt8('a') ≤ x ≤ UInt8('z') || UInt8('A') ≤ x ≤ UInt8('Z')
|
||||||
|
is_alphanumeric(x::UInt8) = UInt8('0') ≤ x ≤ UInt8('9') || is_letter(x)
|
||||||
|
|
||||||
|
"""
|
||||||
|
search_latex_directive(astring, start_position = 1, inbrace=false)
|
||||||
|
|
||||||
|
Search for a LaTeX directive \\directive{argument} or similar in `string`, returning
|
||||||
|
`(start_position, directive_end, argument_end)` such that `string[start_position:directive_end]` gives `\\directive` and `string[directive_end+1:argument_end]`
|
||||||
|
gives `{argument}`. Use [`strip_argument`](@ref) to remove surrounding braces and whitespace
|
||||||
|
from the `argument`.
|
||||||
|
"""
|
||||||
|
function search_latex_directive(astring, start_position = 1)
|
||||||
|
string_length = sizeof(astring)
|
||||||
|
if !(0 < start_position ≤ string_length)
|
||||||
|
0, 0, 0
|
||||||
|
else
|
||||||
|
character_vector = Vector{UInt8}(astring)
|
||||||
|
index = start_position
|
||||||
|
all_spaces = true
|
||||||
|
|
||||||
|
# find \foo directive or {...}:
|
||||||
|
character = UInt8(0)
|
||||||
|
while index ≤ string_length
|
||||||
|
character = character_vector[index]
|
||||||
|
if (character == BACKSLASH || character == BRACE_OPEN || character == CARET || character == UNDERSCORE)
|
||||||
|
break
|
||||||
|
end
|
||||||
|
if character != SPACE
|
||||||
|
all_spaces = false
|
||||||
|
end
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
if index ≤ string_length && character != BRACE_OPEN
|
||||||
|
directive_start = index
|
||||||
|
if character == BACKSLASH
|
||||||
|
index += 2
|
||||||
|
if index - 1 > string_length
|
||||||
|
return 0,0,0
|
||||||
|
end
|
||||||
|
if is_letter(character_vector[index - 1])
|
||||||
|
while index ≤ string_length && is_letter(character_vector[index])
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
directive_end = index - 1
|
||||||
|
else
|
||||||
|
directive_end = directive_start # ^ or _
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
# look for optional opening brace
|
||||||
|
while index ≤ string_length && character_vector[index] == SPACE
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
if index > string_length
|
||||||
|
return directive_start, directive_end, string_length
|
||||||
|
end
|
||||||
|
in_braces = character_vector[index] == BRACE_OPEN
|
||||||
|
if !in_braces
|
||||||
|
# search backwards from \foo to look for { \foo ...}
|
||||||
|
backwards_index = directive_start - 1
|
||||||
|
while backwards_index ≥ start_position && character_vector[backwards_index] == SPACE
|
||||||
|
backwards_index -= 1
|
||||||
|
end
|
||||||
|
if backwards_index < start_position || character_vector[backwards_index] != BRACE_OPEN
|
||||||
|
if character_vector[index] == BACKSLASH
|
||||||
|
# argument is another latex directive
|
||||||
|
inner_start_position, inner_directive_end, inner_argument_end = search_latex_directive(astring, index)
|
||||||
|
return directive_start, directive_end, inner_argument_end
|
||||||
|
elseif character != BACKSLASH
|
||||||
|
# in an equation, token is a single char
|
||||||
|
return directive_start, directive_end, index
|
||||||
|
elseif all_spaces
|
||||||
|
# if `\directive ...` was preceded only
|
||||||
|
# by whitespace, then assume arguments
|
||||||
|
# extend to the end of the string. This
|
||||||
|
# happens when we recurse on `{\directive ...}`.
|
||||||
|
return directive_start, directive_end, string_length
|
||||||
|
else
|
||||||
|
# argument is not in braces … get next token
|
||||||
|
while index ≤ string_length && is_alphanumeric(character_vector[index])
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
return directive_start, directive_end, index - 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
index += 1
|
||||||
|
elseif index > string_length
|
||||||
|
return 0, 0, 0
|
||||||
|
else # { ... }
|
||||||
|
directive_start = index
|
||||||
|
directive_end = index - 1
|
||||||
|
in_braces = true
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
# search for end of argument (closing brace)
|
||||||
|
number_of_braces = 1
|
||||||
|
while index ≤ string_length
|
||||||
|
character = character_vector[index]
|
||||||
|
if character == BRACE_OPEN
|
||||||
|
number_of_braces += 1
|
||||||
|
elseif character == BRACE_CLOSE
|
||||||
|
number_of_braces -= 1
|
||||||
|
if number_of_braces == 0
|
||||||
|
argument_end = if in_braces
|
||||||
|
index
|
||||||
|
else
|
||||||
|
index - 1
|
||||||
|
end
|
||||||
|
return directive_start, directive_end, argument_end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
directive_start, directive_end, string_length
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
strip_argument(astring, start_position = start(astring), end_position = endof(astring))
|
||||||
|
|
||||||
|
Return the substring of `astring` corresponding to the argument from `start_position:end_position`, stripping
|
||||||
|
leading/trailing whitespace and braces.
|
||||||
|
"""
|
||||||
|
function strip_argument(astring, start_position = start(astring), end_position = endof(astring))
|
||||||
|
if start_position > end_position
|
||||||
|
SubString(astring, 1, 0)
|
||||||
|
else
|
||||||
|
string_length = endof(astring)
|
||||||
|
if !(1 ≤ start_position ≤ string_length && 1 ≤ end_position ≤ string_length)
|
||||||
|
throw(BoundsError())
|
||||||
|
else
|
||||||
|
character_vector = Vector{UInt8}(astring)
|
||||||
|
if character_vector[end_position] == BRACE_CLOSE
|
||||||
|
end_position -= 1 # omit brace
|
||||||
|
while start_position ≤ end_position && character_vector[start_position] != BRACE_OPEN
|
||||||
|
start_position += 1
|
||||||
|
end
|
||||||
|
if start_position > end_position
|
||||||
|
error("malformed argument")
|
||||||
|
end
|
||||||
|
start_position += 1 # omit brace
|
||||||
|
end
|
||||||
|
while start_position ≤ end_position && character_vector[end_position] == SPACE
|
||||||
|
end_position -= 1
|
||||||
|
end
|
||||||
|
while start_position ≤ end_position && character_vector[start_position] == SPACE
|
||||||
|
start_position += 1
|
||||||
|
end
|
||||||
|
SubString(astring, start_position, end_position)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# to make replace work for LaTeX directives with our
|
||||||
|
# custom search function, all we need to do is to define
|
||||||
|
# a LaTeXDirectiveSearch type such that search(s, ::LaTeXDirectiveSearch, index)
|
||||||
|
# returns the range of the directive
|
||||||
|
struct LaTeXDirectiveSearch; end
|
||||||
|
function Base.search(s::AbstractString, ::LaTeXDirectiveSearch, index)
|
||||||
|
start_position, directive_end, argument_end = search_latex_directive(s, index)
|
||||||
|
if start_position < index
|
||||||
|
0:-1
|
||||||
|
else
|
||||||
|
start_position:argument_end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Unicode substitutions for LaTeX directives
|
||||||
|
const latex_unicode = Dict(
|
||||||
|
# accent escapes like `\"u` for `ü`, from the list at
|
||||||
|
# https://en.wikibooks.org/wiki/LaTeX/Special_Characters
|
||||||
|
# converted to LaTeX characters (mostly combining marks)
|
||||||
|
"\\`" => "#1\u0300",
|
||||||
|
"\\'" => "#1\u0301",
|
||||||
|
"\\^" => "#1\u0302",
|
||||||
|
"\\\"" => "#1\u0308",
|
||||||
|
"\\H" => "#1\u030b",
|
||||||
|
"\\~" => "#1\u0303",
|
||||||
|
"\\c" => "#1\u0327",
|
||||||
|
"\\k" => "#1\u0328",
|
||||||
|
"\\l" => "\u0142",
|
||||||
|
"\\=" => "#1\u0304",
|
||||||
|
"\\b" => "#1\u0331",
|
||||||
|
"\\." => "#1\u0307",
|
||||||
|
"\\d" => "#1\u0323",
|
||||||
|
"\\r" => "#1\u030a",
|
||||||
|
"\\u" => "#1\u0306",
|
||||||
|
"\\v" => "#1\u030c",
|
||||||
|
"\\t" => "#1\u0361", # fixme: u+0361 should go after first char in #1
|
||||||
|
"\\o" => "\u00f8",
|
||||||
|
"\\i" => "\u0131",
|
||||||
|
"\\j" => "\u0237",
|
||||||
|
|
||||||
|
# other backslash escapes
|
||||||
|
"\\\\" => "\\",
|
||||||
|
"\\{" => "{", "\\}" => "}",
|
||||||
|
"\\%" => "%",
|
||||||
|
# "\\\$" => "\$" -- dollar signs will be unescaped in strip_dollars
|
||||||
|
|
||||||
|
# We parse {....} quoting as an empty directive:
|
||||||
|
"" => "#1",
|
||||||
|
|
||||||
|
# many other substitutions can be found in
|
||||||
|
# Base.REPLCompletions.latex_symbols
|
||||||
|
)
|
||||||
|
|
||||||
|
# LaTeX directives converted to Markdown
|
||||||
|
const markdown_directives = Dict(
|
||||||
|
"\\emph" => "_#1_",
|
||||||
|
"\\textit" => "_#1_",
|
||||||
|
"\\it" => "_#1_",
|
||||||
|
"\\mathit" => "_#1_",
|
||||||
|
"\\textbf" => "**#1**",
|
||||||
|
"\\bf" => "**#1**",
|
||||||
|
"\\mathbf" => "**#1**",
|
||||||
|
"\\texttt" => "`#1`",
|
||||||
|
"\\mathrm" => "#1",
|
||||||
|
"\\url" => "[#1](#1)",
|
||||||
|
"\\sout" => "~~#1~~",
|
||||||
|
"\\st" => "~~#1~~",
|
||||||
|
"\\cancel" => "~~#1~~",
|
||||||
|
)
|
||||||
|
|
||||||
|
# directives that are stripped when converting
|
||||||
|
# to text/plain
|
||||||
|
const text_directives = Dict(
|
||||||
|
"\\emph" => "#1",
|
||||||
|
"\\textit" => "#1",
|
||||||
|
"\\it" => "#1",
|
||||||
|
"\\mathit" => "#1",
|
||||||
|
"\\textbf" => "#1",
|
||||||
|
"\\bf" => "#1",
|
||||||
|
"\\mathbf" => "#1",
|
||||||
|
"\\texttt" => "#1",
|
||||||
|
"\\mathrm" => "#1",
|
||||||
|
"\\url" => "#1",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Unicode includes an incomplete set of super/subscript characters:
|
||||||
|
const superscripts = Dict(
|
||||||
|
'0'=>'⁰', '1'=>'¹', '2'=>'²', '3'=>'³', '4'=>'⁴', '5'=>'⁵', '6'=>'⁶', '7'=>'⁷', '8'=>'⁸', '9'=>'⁹',
|
||||||
|
'a'=>'ᵃ', 'b'=>'ᵇ', 'c'=>'ᶜ', 'd'=>'ᵈ', 'e'=>'ᵉ', 'f'=>'ᶠ', 'g'=>'ᵍ', 'h'=>'ʰ',
|
||||||
|
'i'=>'ⁱ', 'j'=>'ʲ', 'k'=>'ᵏ', 'l'=>'ˡ', 'm'=>'ᵐ', 'n'=>'ⁿ', 'o'=>'ᵒ', 'p'=>'ᵖ',
|
||||||
|
'r'=>'ʳ', 's'=>'ˢ', 't'=>'ᵗ', 'u'=>'ᵘ', 'v'=>'ᵛ', 'w'=>'ʷ', 'x'=>'ˣ', 'y'=>'ʸ', 'z'=>'ᶻ',
|
||||||
|
'A'=>'ᴬ', 'B'=>'ᴮ', 'C'=>'ᶜ', 'D'=>'ᴰ', 'E'=>'ᴱ', 'G'=>'ᴳ', 'H'=>'ᴴ', 'I'=>'ᴵ', 'J'=>'ᴶ',
|
||||||
|
'K'=>'ᴷ', 'L'=>'ᴸ', 'M'=>'ᴹ', 'N'=>'ᴺ', 'O'=>'ᴼ', 'P'=>'ᴾ', 'R'=>'ᴿ', 'S'=>'ˢ', 'T'=>'ᵀ',
|
||||||
|
'U'=>'ᵁ', 'V'=>'ⱽ', 'W'=>'ᵂ', 'β'=>'ᵝ', 'γ'=>'ᵞ', 'δ'=>'ᵟ', 'ψ'=>'ᵠ', 'χ'=>'ᵡ', 'Θ'=>'ᶿ',
|
||||||
|
'+'=>'⁺', '-'=>'⁻', '='=>'⁼', '('=>'⁽', ')'=>'⁾', ' '=>' ', '∘'=>'°',
|
||||||
|
)
|
||||||
|
const subscripts = Dict(
|
||||||
|
'0'=>'₀', '1'=>'₁', '2'=>'₂', '3'=>'₃', '4'=>'₄', '5'=>'₅', '6'=>'₆', '7'=>'₇', '8'=>'₈', '9'=>'₉',
|
||||||
|
'a'=>'ₐ', 'e'=>'ₑ', 'h'=>'ₕ', 'i'=>'ᵢ', 'j'=>'ⱼ', 'k'=>'ₖ', 'l'=>'ₗ', 'm'=>'ₘ',
|
||||||
|
'n'=>'ₙ', 'o'=>'ₒ', 'p'=>'ₚ', 'r'=>'ᵣ', 's'=>'ₛ', 't'=>'ₜ', 'u'=>'ᵤ', 'v'=>'ᵥ', 'x'=>'ₓ',
|
||||||
|
'β'=>'ᵦ', 'γ'=>'ᵧ', 'ρ'=>'ᵨ', 'ψ'=>'ᵩ', 'χ'=>'ᵪ',
|
||||||
|
'-'=>'₋', '+'=>'₊', '='=>'₌', '('=>'₍', ')'=>'₎', ' '=>' ',
|
||||||
|
)
|
||||||
|
|
||||||
|
function replace_characters(astring, character_map)
|
||||||
|
buffer = IOBuffer()
|
||||||
|
for character in astring
|
||||||
|
mapped_character = get(character_map, character, '\0')
|
||||||
|
if mapped_character == '\0'
|
||||||
|
return ""
|
||||||
|
end
|
||||||
|
print(buffer, mapped_character)
|
||||||
|
end
|
||||||
|
String(take!(buffer))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Given a (sub)string `s` that represents a LaTeX directive matched
|
||||||
|
# by search_latex_directive, performs our Unicode substitutions and
|
||||||
|
# also any additional substitutions given by extra_directives.
|
||||||
|
function directive_substitution(astring, extra_directives)
|
||||||
|
start_position, directive_end, argument_end = search_latex_directive(astring)
|
||||||
|
string_length = endof(astring)
|
||||||
|
directive = SubString(astring, start_position, directive_end)
|
||||||
|
for dict in (extra_directives, latex_unicode, Base.REPLCompletions.latex_symbols)
|
||||||
|
if haskey(dict, directive)
|
||||||
|
substitution = dict[directive]
|
||||||
|
if contains(substitution, "#1")
|
||||||
|
argument = strip_argument(replace_directives(strip_argument(astring, directive_end + 1, string_length), extra_directives))
|
||||||
|
return replace(substitution, "#1", argument)
|
||||||
|
else
|
||||||
|
argument = replace_directives(SubString(astring, directive_end+1, string_length), extra_directives)
|
||||||
|
if strwidth(substitution) == 0 # \hat{...} etc: combining chars go after argument
|
||||||
|
return string(strip_argument(argument), substitution)
|
||||||
|
else
|
||||||
|
return string(substitution, argument) # don't strip for 0-arg macros
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if directive == "^" || directive == "_" # super/subscripts
|
||||||
|
argument = strip_argument(replace_directives(strip_argument(astring, directive_end + 1, string_length), extra_directives))
|
||||||
|
dict = if directive == "^"
|
||||||
|
superscripts
|
||||||
|
else
|
||||||
|
subscripts
|
||||||
|
end
|
||||||
|
substitution = replace_characters(argument, dict)
|
||||||
|
if !isempty(substitution)
|
||||||
|
return substitution
|
||||||
|
end
|
||||||
|
end
|
||||||
|
astring # ignore unrecognized directives
|
||||||
|
end
|
||||||
|
|
||||||
|
# replace all latex directives in `s` via `directive_substitution`
|
||||||
|
replace_directives(astring, extra_directives) =
|
||||||
|
replace(astring, LaTeXDirectiveSearch(), substitution -> directive_substitution(substitution, extra_directives))
|
||||||
|
|
||||||
|
# strip unescaped $ signs from s
|
||||||
|
function strip_dollars(astring)
|
||||||
|
buffer = IOBuffer()
|
||||||
|
character_vector = Vector{UInt8}(astring)
|
||||||
|
for index = 1:sizeof(astring)
|
||||||
|
character = character_vector[index]
|
||||||
|
if character == BACKSLASH && index < sizeof(astring) && character_vector[index + 1] == DOLLAR
|
||||||
|
write(buffer, DOLLAR) # \$ -> $
|
||||||
|
elseif character != DOLLAR
|
||||||
|
write(buffer, character)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return String(take!(buffer))
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
simplify_latex(astring, extra_directives)
|
||||||
|
|
||||||
|
Simplify a LaTeX string `astring` into "plain text" if possible, stripping/converting
|
||||||
|
known LaTeX directives in favor of e.g Unicode.
|
||||||
|
|
||||||
|
`extra_directives` is a dictionary (`String=>String`) that maps LaTeX directives
|
||||||
|
to replacements. It defaults to `BibTeX.text_directives`, which simply strips
|
||||||
|
out things like bold and italics. Alternatively, you can pass `BibTeX.markdown_directives`,
|
||||||
|
which uses Markdown syntax for such directives.
|
||||||
|
"""
|
||||||
|
simplify_latex(astring, extra_directives = text_directives) =
|
||||||
|
strip_dollars(replace_directives(astring, extra_directives))
|
|
@ -0,0 +1,218 @@
|
||||||
|
mutable struct Parser{T}
|
||||||
|
tokens::T
|
||||||
|
substitutions::Dict{String, String}
|
||||||
|
records::Dict{String, Dict{String, String}}
|
||||||
|
line::Int
|
||||||
|
bracket_counter::Int
|
||||||
|
end
|
||||||
|
|
||||||
|
Base.eltype(p::Parser) = eltype(p.tokens)
|
||||||
|
Base.one(p::Parser) = eltype(p)("")
|
||||||
|
|
||||||
|
Parser(tokens::T, substitutions, records, line, bracket_counter) where T =
|
||||||
|
Parser{T}(tokens, substitutions, records, line, bracket_counter)
|
||||||
|
|
||||||
|
Parser(tokens) = Parser(tokens, Dict{String, String}(), Dict{String, Dict{String, String}}(), 1, 0)
|
||||||
|
|
||||||
|
parse_text(text) = matchall(r"[^\s\"#{}@,=\\]+|\s+|\"|#|{|}|@|,|=|\\", text) |> Parser
|
||||||
|
|
||||||
|
location(parser) = "on line $(parser.line)"
|
||||||
|
|
||||||
|
Base.isempty(p::Parser) = isempty(p.tokens)
|
||||||
|
|
||||||
|
next_token_default!(parser) =
|
||||||
|
if isempty(parser.tokens)
|
||||||
|
one(parser)
|
||||||
|
else
|
||||||
|
result = shift!(parser.tokens)
|
||||||
|
parser.line = parser.line + count(x -> x == '\n', result)
|
||||||
|
if all(isspace, result)
|
||||||
|
eltype(parser)(" ")
|
||||||
|
else
|
||||||
|
result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
next_token_with_space!(parser, eol = "additional tokens") = begin
|
||||||
|
result = next_token_default!(parser)
|
||||||
|
if result == ""
|
||||||
|
error("Expected $eol $(location(parser))")
|
||||||
|
else
|
||||||
|
result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
next_token!(parser, eol = "additional tokens") = begin
|
||||||
|
result = next_token_with_space!(parser, eol)
|
||||||
|
if all(isspace, result)
|
||||||
|
next_token_with_space!(parser, eol)
|
||||||
|
else
|
||||||
|
result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
expect(parser, result, eol) =
|
||||||
|
if result != eol
|
||||||
|
error("Expected $eol $(location(parser))")
|
||||||
|
end
|
||||||
|
|
||||||
|
expect!(parser, eol) =
|
||||||
|
expect(parser, next_token!(parser, eol), eol)
|
||||||
|
|
||||||
|
token_and_counter!(parser, eol = "}") = begin
|
||||||
|
token = next_token_with_space!(parser, eol)
|
||||||
|
if token == "{"
|
||||||
|
parser.bracket_counter += 1
|
||||||
|
elseif token == "}"
|
||||||
|
parser.bracket_counter -= 1
|
||||||
|
end
|
||||||
|
if parser.bracket_counter < 0
|
||||||
|
error("} without corresponding { $(location(parser))")
|
||||||
|
else
|
||||||
|
token
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
value!(parser, values = eltype(parser)[]) = begin
|
||||||
|
token = next_token!(parser)
|
||||||
|
if token == "\""
|
||||||
|
token = token_and_counter!(parser, "\"")
|
||||||
|
while !(token == "\"" && parser.bracket_counter == 0)
|
||||||
|
push!(values, token)
|
||||||
|
token = token_and_counter!(parser, "\" or }")
|
||||||
|
end
|
||||||
|
elseif token == "{"
|
||||||
|
parser.bracket_counter += 1
|
||||||
|
token = token_and_counter!(parser)
|
||||||
|
while parser.bracket_counter > 0
|
||||||
|
push!(values, token)
|
||||||
|
token = token_and_counter!(parser)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
push!(values, getkey(parser.substitutions, token, String(token) ) )
|
||||||
|
end
|
||||||
|
token = next_token!(parser, ", or }")
|
||||||
|
if token == "#"
|
||||||
|
push!(values, " ")
|
||||||
|
value!(parser, values)
|
||||||
|
else
|
||||||
|
token, join(values)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
field!(parser, dict) = begin
|
||||||
|
token = ","
|
||||||
|
while token == ","
|
||||||
|
token = next_token!(parser, "a new entry or }")
|
||||||
|
if token != "}"
|
||||||
|
key = lowercase(token)
|
||||||
|
if haskey(dict, key)
|
||||||
|
error("Duplicated field $key $(location(parser))")
|
||||||
|
else
|
||||||
|
expect!(parser, "=")
|
||||||
|
token, dict[key] = value!(parser)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
expect(parser, token, "}")
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
parse_bibtex(text)
|
||||||
|
|
||||||
|
This is a simple input parser for BibTex. I had trouble finding a standard
|
||||||
|
specification, but I've included several features of real BibTex. Returns
|
||||||
|
a preamble (or an empty string) and a dict of dicts.
|
||||||
|
|
||||||
|
```jldoctest
|
||||||
|
julia> using BibTeX: parse_bibtex
|
||||||
|
|
||||||
|
julia> preamble, result = parse_bibtex(""\"
|
||||||
|
@preamble{some instructions}
|
||||||
|
@comment blah blah
|
||||||
|
@string{short = long}
|
||||||
|
@a{b,
|
||||||
|
c = {{c} c},
|
||||||
|
d = "d {"} d",
|
||||||
|
e = f # short
|
||||||
|
}
|
||||||
|
""\");
|
||||||
|
|
||||||
|
julia> preamble
|
||||||
|
"some instructions"
|
||||||
|
|
||||||
|
julia> result["b"]["__type__"]
|
||||||
|
"a"
|
||||||
|
|
||||||
|
julia> result["b"]["c"]
|
||||||
|
"{c} c"
|
||||||
|
|
||||||
|
julia> result["b"]["d"]
|
||||||
|
"d {\\"} d"
|
||||||
|
|
||||||
|
julia> result["b"]["e"]
|
||||||
|
"f short"
|
||||||
|
|
||||||
|
julia> parse_bibtex("@book")
|
||||||
|
ERROR: Expected { on line 1
|
||||||
|
[...]
|
||||||
|
|
||||||
|
julia> parse_bibtex("@book@")
|
||||||
|
ERROR: Expected { on line 1
|
||||||
|
[...]
|
||||||
|
```
|
||||||
|
|
||||||
|
Repeated fields and keys are not allowed:
|
||||||
|
|
||||||
|
```jldoctest
|
||||||
|
julia> using BibTeX: parse_bibtex
|
||||||
|
|
||||||
|
julia> parse_bibtex(""\"
|
||||||
|
@book{abook,
|
||||||
|
title = A}
|
||||||
|
@book{abook,
|
||||||
|
title = B}
|
||||||
|
""\")
|
||||||
|
ERROR: Duplicated id abook on line 3
|
||||||
|
[...]
|
||||||
|
|
||||||
|
julia> parse_bibtex(""\"
|
||||||
|
@book{abook,
|
||||||
|
title = A,
|
||||||
|
title = B}
|
||||||
|
""\")
|
||||||
|
ERROR: Duplicated field title on line 3
|
||||||
|
[...]
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
parse_bibtex(text) = begin
|
||||||
|
parser = parse_text(text)
|
||||||
|
token = next_token_default!(parser)
|
||||||
|
preamble = ""
|
||||||
|
while token != ""
|
||||||
|
if token == "@"
|
||||||
|
record_type = lowercase(next_token!(parser))
|
||||||
|
if record_type == "preamble"
|
||||||
|
trash, preamble = value!(parser)
|
||||||
|
elseif record_type != "comment"
|
||||||
|
expect!(parser, "{")
|
||||||
|
if record_type == "string"
|
||||||
|
field!(parser, parser.substitutions)
|
||||||
|
else
|
||||||
|
id = next_token!(parser)
|
||||||
|
records = parser.records
|
||||||
|
if haskey(records, id)
|
||||||
|
error("Duplicated id $id $(location(parser))")
|
||||||
|
else
|
||||||
|
dict = Dict("__type__" => record_type)
|
||||||
|
expect!(parser, ",")
|
||||||
|
field!(parser, dict)
|
||||||
|
records[id] = dict
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
token = next_token_default!(parser)
|
||||||
|
end
|
||||||
|
preamble, parser.records
|
||||||
|
end
|
|
@ -0,0 +1 @@
|
||||||
|
Documenter
|
|
@ -0,0 +1,6 @@
|
||||||
|
const file = joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bib") |> readstring
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
using BibTeX
|
||||||
|
|
||||||
|
@benchmark BibTeX.parse_bibtex(file)
|
|
@ -0,0 +1,58 @@
|
||||||
|
using BibTeX, Base.Test
|
||||||
|
|
||||||
|
base_file = dirname(dirname(@__FILE__))
|
||||||
|
|
||||||
|
import Documenter
|
||||||
|
Documenter.makedocs(
|
||||||
|
modules = [BibTeX],
|
||||||
|
format = :html,
|
||||||
|
sitename = "BibTeX.jl",
|
||||||
|
root = joinpath(base_file, "docs"),
|
||||||
|
pages = Any["Home" => "index.md"],
|
||||||
|
strict = true,
|
||||||
|
linkcheck = true,
|
||||||
|
checkdocs = :exports,
|
||||||
|
authors = "Brandon Taylor"
|
||||||
|
)
|
||||||
|
|
||||||
|
@testset "examples.bib" begin
|
||||||
|
# note: ".." does not work on windows
|
||||||
|
b = open(Bibliography, joinpath(base_file, "example", "examples.bib"), "r")
|
||||||
|
@test length(b) == 92
|
||||||
|
@test (b["angenendt"]::Citation{:article})["date"] == "2002"
|
||||||
|
end
|
||||||
|
|
||||||
|
@testset "small bib" begin
|
||||||
|
b = Bibliography("""
|
||||||
|
@article{foo, bar=baz}
|
||||||
|
@book{bar, foobar=1}
|
||||||
|
""")
|
||||||
|
@test get(b, "foobar", nothing) === nothing
|
||||||
|
@test get(b["foo"], "blah", nothing) === nothing
|
||||||
|
|
||||||
|
@test string(b["foo"]) == "Citation{:article}(1 entries)"
|
||||||
|
|
||||||
|
Base.rehash!(b)
|
||||||
|
b2 = copy(b)
|
||||||
|
@test length(b2) == length(b)
|
||||||
|
@test isempty(sizehint!(empty!(b2),10))
|
||||||
|
@test isempty(similar(b))
|
||||||
|
b2["x"] = Citation{:foo}()
|
||||||
|
b2["x"]["bar"] = "blah"
|
||||||
|
@test length(b2) == length(b2["x"]) == 1
|
||||||
|
@test b2["x"]["bar"] == "blah"
|
||||||
|
@test get(b2["x"], "foo", nothing) === nothing
|
||||||
|
@test collect(b2)[1][2] == b2["x"]
|
||||||
|
@test collect(b2["x"])[1] == ("bar"=>"blah")
|
||||||
|
Base.rehash!(b2["x"])
|
||||||
|
x2 = copy(b2["x"])::Citation{:foo}
|
||||||
|
@test length(x2) == 1
|
||||||
|
@test isempty(similar(x2))
|
||||||
|
@test isempty(sizehint!(empty!(x2),10))
|
||||||
|
end
|
||||||
|
|
||||||
|
import BibTeX: simplify_latex, markdown_directives
|
||||||
|
@testset "latex" begin
|
||||||
|
@test simplify_latex(raw"foo \$$x_1x_2^\mathrm{3}$ \dot{\alpha} {quote} \% \{unquote\} \emph{bar \textbf{bold}} {\bf baz 2.0} {\^{u}}", markdown_directives) ==
|
||||||
|
"foo \$x₁x₂³ α̇ quote % {unquote} _bar **bold**_ **baz 2.0** û"
|
||||||
|
end
|
Loading…
Reference in New Issue