Merge pull request #4 from stevengj/bibtype
construct Bib and BibItem types for better I/O
This commit is contained in:
commit
251f16ce9f
|
@ -55,12 +55,12 @@
|
||||||
indextitle = {Effect of immobilization on catalytic characteristics},
|
indextitle = {Effect of immobilization on catalytic characteristics},
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{angenendt,
|
@Article{angenendt,
|
||||||
author = {Angenendt, Arnold},
|
author = {Angenendt, Arnold},
|
||||||
title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der
|
title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der
|
||||||
Patrozinienkunde},
|
Patrozinienkunde},
|
||||||
journaltitle = {Revue d'Histoire Eccl{\'e}siastique},
|
journaltitle = {Revue d'Histoire Eccl{\'e}siastique},
|
||||||
date = 2002,
|
Date = 2002,
|
||||||
volume = 97,
|
volume = 97,
|
||||||
pages = {431--456, 791--823},
|
pages = {431--456, 791--823},
|
||||||
langid = {german},
|
langid = {german},
|
||||||
|
|
173
src/BibTeX.jl
173
src/BibTeX.jl
|
@ -1,173 +1,8 @@
|
||||||
module BibTeX
|
module BibTeX
|
||||||
|
export Bibliography, Citation
|
||||||
|
|
||||||
struct Parser{T}
|
include("parser.jl")
|
||||||
tokens::T
|
include("citation.jl")
|
||||||
substitutions::Dict{String, String}
|
include("bibliography.jl")
|
||||||
records::Dict{String, Dict{String, String}}
|
|
||||||
line::Ref{Int}
|
|
||||||
end
|
|
||||||
|
|
||||||
Base.eltype(p::Parser) = eltype(p.tokens)
|
|
||||||
Base.one(p::Parser) = eltype(p)("")
|
|
||||||
|
|
||||||
Parser(tokens::T, substitutions, records, line) where T =
|
|
||||||
Parser{T}(tokens, substitutions, records, line)
|
|
||||||
|
|
||||||
parse_text(text) = begin
|
|
||||||
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
|
|
||||||
Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
|
|
||||||
end
|
|
||||||
|
|
||||||
location(parser) = "on line $(parser.line.x)"
|
|
||||||
|
|
||||||
next_token_default!(parser) =
|
|
||||||
if isempty(parser.tokens)
|
|
||||||
one(parser)
|
|
||||||
else
|
|
||||||
result = shift!(parser.tokens)
|
|
||||||
if result == "\n"
|
|
||||||
parser.line.x = parser.line.x + 1
|
|
||||||
next_token_default!(parser)
|
|
||||||
else
|
|
||||||
result
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
next_token!(parser, eol = "additional tokens") = begin
|
|
||||||
result = next_token_default!(parser)
|
|
||||||
if result == ""
|
|
||||||
error("Expected $eol $(location(parser))")
|
|
||||||
else
|
|
||||||
result
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
expect(parser, result, expectation) =
|
|
||||||
if result != expectation
|
|
||||||
error("Expected $expectation $(location(parser))")
|
|
||||||
end
|
|
||||||
|
|
||||||
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
|
|
||||||
|
|
||||||
token_and_counter!(parser, bracket_counter = 1) = begin
|
|
||||||
token = next_token!(parser, "}")
|
|
||||||
if token == "{"
|
|
||||||
bracket_counter += 1
|
|
||||||
elseif token == "}"
|
|
||||||
bracket_counter -= 1
|
|
||||||
end
|
|
||||||
token, bracket_counter
|
|
||||||
end
|
|
||||||
|
|
||||||
value!(parser, values = eltype(parser)[]) = begin
|
|
||||||
token = next_token!(parser)
|
|
||||||
if token == "\""
|
|
||||||
token = next_token!(parser, "\"")
|
|
||||||
while token != "\""
|
|
||||||
push!(values, token)
|
|
||||||
token = next_token!(parser, "\"")
|
|
||||||
end
|
|
||||||
elseif token == "{"
|
|
||||||
token, counter = token_and_counter!(parser)
|
|
||||||
while counter > 0
|
|
||||||
push!(values, token)
|
|
||||||
token, counter = token_and_counter!(parser, counter)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
push!(values, getkey(parser.substitutions, token, String(token) ) )
|
|
||||||
end
|
|
||||||
token = next_token!(parser, ", or }")
|
|
||||||
if token == "#"
|
|
||||||
value!(parser, values)
|
|
||||||
else
|
|
||||||
token, join(values, " ")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
field!(parser, dict) = begin
|
|
||||||
token = ","
|
|
||||||
while token == ","
|
|
||||||
token = next_token!(parser, "a new entry or }")
|
|
||||||
if token != "}"
|
|
||||||
key = token
|
|
||||||
expect!(parser, "=")
|
|
||||||
token, dict[key] = value!(parser)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
expect(parser, token, "}")
|
|
||||||
end
|
|
||||||
|
|
||||||
export parse_bibtex
|
|
||||||
"""
|
|
||||||
parse_bibtex(text)
|
|
||||||
|
|
||||||
This is a simple input parser for BibTex. I had trouble finding a standard
|
|
||||||
specification, but I've included several features of real BibTex. Returns
|
|
||||||
a preamble (or an empty string) and a dict of dicts.
|
|
||||||
|
|
||||||
```jldoctest
|
|
||||||
julia> using BibTeX
|
|
||||||
|
|
||||||
julia> preamble, result = parse_bibtex(""\"
|
|
||||||
@preamble{some instructions}
|
|
||||||
@comment blah blah
|
|
||||||
@string{short = long}
|
|
||||||
@a{b,
|
|
||||||
c = { {c} c},
|
|
||||||
d = "d d",
|
|
||||||
e = f # short
|
|
||||||
}
|
|
||||||
""\");
|
|
||||||
|
|
||||||
julia> preamble
|
|
||||||
"some instructions"
|
|
||||||
|
|
||||||
julia> result["b"]["type"]
|
|
||||||
"a"
|
|
||||||
|
|
||||||
julia> result["b"]["c"]
|
|
||||||
"{ c } c"
|
|
||||||
|
|
||||||
julia> result["b"]["d"]
|
|
||||||
"d d"
|
|
||||||
|
|
||||||
julia> result["b"]["e"]
|
|
||||||
"f short"
|
|
||||||
|
|
||||||
julia> parse_bibtex("@book")
|
|
||||||
ERROR: Expected { on line 1
|
|
||||||
[...]
|
|
||||||
|
|
||||||
julia> parse_bibtex("@book@")
|
|
||||||
ERROR: Expected { on line 1
|
|
||||||
[...]
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
parse_bibtex(text) = begin
|
|
||||||
parser = parse_text(text)
|
|
||||||
token = next_token_default!(parser)
|
|
||||||
preamble = ""
|
|
||||||
while token != ""
|
|
||||||
if token == "@"
|
|
||||||
record_type = lowercase(next_token!(parser))
|
|
||||||
if record_type == "preamble"
|
|
||||||
trash, preamble = value!(parser)
|
|
||||||
elseif record_type != "comment"
|
|
||||||
expect!(parser, "{")
|
|
||||||
if record_type == "string"
|
|
||||||
field!(parser, parser.substitutions)
|
|
||||||
else
|
|
||||||
id = next_token!(parser)
|
|
||||||
dict = Dict("type" => record_type)
|
|
||||||
expect!(parser, ",")
|
|
||||||
field!(parser, dict)
|
|
||||||
parser.records[id] = dict
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
token = next_token_default!(parser)
|
|
||||||
end
|
|
||||||
preamble, parser.records
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
struct Bibliography <: Associative{String,Citation}
|
||||||
|
preamble::String
|
||||||
|
data::Dict{String,Citation}
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
Bibliography(bibtex::String)
|
||||||
|
Bibliography(io::IO)
|
||||||
|
|
||||||
|
Given a string (or IO stream) of bibtex-format bibliography data,
|
||||||
|
parses the data and returns a `Dict`-like object `b::Bibliography` that
|
||||||
|
behaves as a dictionary mapping strings to bibliography items
|
||||||
|
[`Citation`](@ref).
|
||||||
|
"""
|
||||||
|
function Bibliography(bibtex::String)
|
||||||
|
preamble, data = parse_bibtex(bibtex)
|
||||||
|
return Bibliography(preamble, Dict(k=>Citation!(v) for (k,v) in data))
|
||||||
|
end
|
||||||
|
Bibliography(io::IO) = Bibliography(readstring(io))
|
||||||
|
Base.open(::Type{Bibliography}, args...) = open(io -> Bibliography(io), args...)
|
||||||
|
|
||||||
|
Base.similar(b::Bibliography) = Bibliography("", Dict{String,Citation}())
|
||||||
|
Base.rehash!(b::Bibliography, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
|
||||||
|
Base.sizehint!(b::Bibliography, n) = begin sizehint!(b.data, n); b; end
|
||||||
|
Base.empty!(b::Bibliography) = begin empty!(b.data); b; end
|
||||||
|
Base.copy(b::Bibliography) = Bibliography(b.preamble, copy(b.data))
|
||||||
|
|
||||||
|
function Base.setindex!(b::Bibliography, v::Citation, k::AbstractString)
|
||||||
|
b.data[String(k)] = v
|
||||||
|
return b
|
||||||
|
end
|
||||||
|
Base.get(b::Bibliography, k::AbstractString, default) = get(b.data, String(k), default)
|
||||||
|
|
||||||
|
Base.start(b::Bibliography) = start(b.data)
|
||||||
|
Base.done(b::Bibliography, i) = done(b.data, i)
|
||||||
|
Base.next(b::Bibliography, i) = next(b.data, i)
|
||||||
|
Base.length(b::Bibliography) = length(b.data)
|
||||||
|
|
||||||
|
# todo: add specialized Base.show methods for MIME"text/bibtex" etc.
|
|
@ -0,0 +1,42 @@
|
||||||
|
"""
|
||||||
|
Citation{S}(data::Dict{String,String})
|
||||||
|
|
||||||
|
A bibliography item in a bibTeX database, based on a dictionary of
|
||||||
|
strings to values. It is parameterized by a symbol `S` giving the
|
||||||
|
type of the item (`:article` etcetera). A `b::Citation` supports
|
||||||
|
`b[key]` access to retrieve the data and in general acts like
|
||||||
|
a dictionary from `String` to `String`.
|
||||||
|
"""
|
||||||
|
struct Citation{S} <: Associative{String,String}
|
||||||
|
data::Dict{String,String}
|
||||||
|
end
|
||||||
|
Citation{S}() where {S} = Citation{S}(Dict{String,String}())
|
||||||
|
|
||||||
|
function Citation!(data::Dict{String,String})
|
||||||
|
S = Symbol(pop!(data, "__type__"))
|
||||||
|
return Citation{S}(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
Base.similar(b::Citation{S}) where {S} = Citation{S}(Dict{String,String}())
|
||||||
|
Base.rehash!(b::Citation, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
|
||||||
|
Base.sizehint!(b::Citation, n) = begin sizehint!(b.data, n); b; end
|
||||||
|
Base.empty!(b::Citation) = begin empty!(b.data); b; end
|
||||||
|
Base.copy(b::Citation{S}) where {S} = Citation{S}(copy(b.data))
|
||||||
|
|
||||||
|
Base.get(b::Citation, k::AbstractString, default) = get(b.data, String(k), default)
|
||||||
|
Base.getindex(b::Citation, k::AbstractString) = getindex(b.data, String(k))
|
||||||
|
function Base.setindex!(b::Citation, v::AbstractString, k::AbstractString)
|
||||||
|
b.data[String(k)] = String(v)
|
||||||
|
return b
|
||||||
|
end
|
||||||
|
|
||||||
|
Base.start(b::Citation) = start(b.data)
|
||||||
|
Base.done(b::Citation, i) = done(b.data, i)
|
||||||
|
Base.next(b::Citation, i) = next(b.data, i)
|
||||||
|
Base.length(b::Citation) = length(b.data)
|
||||||
|
|
||||||
|
function Base.show{S}(io::IO, b::Citation{S})
|
||||||
|
print(io, "Citation{:$S}(", length(b), " entries)")
|
||||||
|
end
|
||||||
|
|
||||||
|
# TODO: add Base.show text/plain and text/markdown for formatted citation
|
|
@ -0,0 +1,168 @@
|
||||||
|
struct Parser{T}
|
||||||
|
tokens::T
|
||||||
|
substitutions::Dict{String, String}
|
||||||
|
records::Dict{String, Dict{String, String}}
|
||||||
|
line::Ref{Int}
|
||||||
|
end
|
||||||
|
|
||||||
|
Base.eltype(p::Parser) = eltype(p.tokens)
|
||||||
|
Base.one(p::Parser) = eltype(p)("")
|
||||||
|
|
||||||
|
Parser(tokens::T, substitutions, records, line) where T =
|
||||||
|
Parser{T}(tokens, substitutions, records, line)
|
||||||
|
|
||||||
|
parse_text(text) = begin
|
||||||
|
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
|
||||||
|
Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
|
||||||
|
end
|
||||||
|
|
||||||
|
location(parser) = "on line $(parser.line.x)"
|
||||||
|
|
||||||
|
next_token_default!(parser) =
|
||||||
|
if isempty(parser.tokens)
|
||||||
|
one(parser)
|
||||||
|
else
|
||||||
|
result = shift!(parser.tokens)
|
||||||
|
if result == "\n"
|
||||||
|
parser.line.x = parser.line.x + 1
|
||||||
|
next_token_default!(parser)
|
||||||
|
else
|
||||||
|
result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
next_token!(parser, eol = "additional tokens") = begin
|
||||||
|
result = next_token_default!(parser)
|
||||||
|
if result == ""
|
||||||
|
error("Expected $eol $(location(parser))")
|
||||||
|
else
|
||||||
|
result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
expect(parser, result, expectation) =
|
||||||
|
if result != expectation
|
||||||
|
error("Expected $expectation $(location(parser))")
|
||||||
|
end
|
||||||
|
|
||||||
|
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
|
||||||
|
|
||||||
|
token_and_counter!(parser, bracket_counter = 1) = begin
|
||||||
|
token = next_token!(parser, "}")
|
||||||
|
if token == "{"
|
||||||
|
bracket_counter += 1
|
||||||
|
elseif token == "}"
|
||||||
|
bracket_counter -= 1
|
||||||
|
end
|
||||||
|
token, bracket_counter
|
||||||
|
end
|
||||||
|
|
||||||
|
value!(parser, values = eltype(parser)[]) = begin
|
||||||
|
token = next_token!(parser)
|
||||||
|
if token == "\""
|
||||||
|
token = next_token!(parser, "\"")
|
||||||
|
while token != "\""
|
||||||
|
push!(values, token)
|
||||||
|
token = next_token!(parser, "\"")
|
||||||
|
end
|
||||||
|
elseif token == "{"
|
||||||
|
token, counter = token_and_counter!(parser)
|
||||||
|
while counter > 0
|
||||||
|
push!(values, token)
|
||||||
|
token, counter = token_and_counter!(parser, counter)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
push!(values, getkey(parser.substitutions, token, String(token) ) )
|
||||||
|
end
|
||||||
|
token = next_token!(parser, ", or }")
|
||||||
|
if token == "#"
|
||||||
|
value!(parser, values)
|
||||||
|
else
|
||||||
|
token, join(values, " ")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
field!(parser, dict) = begin
|
||||||
|
token = ","
|
||||||
|
while token == ","
|
||||||
|
token = next_token!(parser, "a new entry or }")
|
||||||
|
if token != "}"
|
||||||
|
key = token
|
||||||
|
expect!(parser, "=")
|
||||||
|
token, dict[lowercase(key)] = value!(parser)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
expect(parser, token, "}")
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
parse_bibtex(text)
|
||||||
|
|
||||||
|
This is a simple input parser for BibTex. I had trouble finding a standard
|
||||||
|
specification, but I've included several features of real BibTex. Returns
|
||||||
|
a preamble (or an empty string) and a dict of dicts.
|
||||||
|
|
||||||
|
```jldoctest
|
||||||
|
julia> using BibTeX: parse_bibtex
|
||||||
|
|
||||||
|
julia> preamble, result = parse_bibtex(""\"
|
||||||
|
@preamble{some instructions}
|
||||||
|
@comment blah blah
|
||||||
|
@string{short = long}
|
||||||
|
@a{b,
|
||||||
|
c = { {c} c},
|
||||||
|
d = "d d",
|
||||||
|
e = f # short
|
||||||
|
}
|
||||||
|
""\");
|
||||||
|
|
||||||
|
julia> preamble
|
||||||
|
"some instructions"
|
||||||
|
|
||||||
|
julia> result["b"]["__type__"]
|
||||||
|
"a"
|
||||||
|
|
||||||
|
julia> result["b"]["c"]
|
||||||
|
"{ c } c"
|
||||||
|
|
||||||
|
julia> result["b"]["d"]
|
||||||
|
"d d"
|
||||||
|
|
||||||
|
julia> result["b"]["e"]
|
||||||
|
"f short"
|
||||||
|
|
||||||
|
julia> parse_bibtex("@book")
|
||||||
|
ERROR: Expected { on line 1
|
||||||
|
[...]
|
||||||
|
|
||||||
|
julia> parse_bibtex("@book@")
|
||||||
|
ERROR: Expected { on line 1
|
||||||
|
[...]
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
parse_bibtex(text) = begin
|
||||||
|
parser = parse_text(text)
|
||||||
|
token = next_token_default!(parser)
|
||||||
|
preamble = ""
|
||||||
|
while token != ""
|
||||||
|
if token == "@"
|
||||||
|
record_type = lowercase(next_token!(parser))
|
||||||
|
if record_type == "preamble"
|
||||||
|
trash, preamble = value!(parser)
|
||||||
|
elseif record_type != "comment"
|
||||||
|
expect!(parser, "{")
|
||||||
|
if record_type == "string"
|
||||||
|
field!(parser, parser.substitutions)
|
||||||
|
else
|
||||||
|
id = next_token!(parser)
|
||||||
|
dict = Dict("__type__" => record_type)
|
||||||
|
expect!(parser, ",")
|
||||||
|
field!(parser, dict)
|
||||||
|
parser.records[id] = dict
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
token = next_token_default!(parser)
|
||||||
|
end
|
||||||
|
preamble, parser.records
|
||||||
|
end
|
|
@ -3,4 +3,4 @@ const file = joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bi
|
||||||
using BenchmarkTools
|
using BenchmarkTools
|
||||||
using BibTeX
|
using BibTeX
|
||||||
|
|
||||||
@benchmark parse_bibtex(file)
|
@benchmark BibTeX.parse_bibtex(file)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
using BibTeX
|
using BibTeX, Base.Test
|
||||||
|
|
||||||
import Documenter
|
import Documenter
|
||||||
Documenter.makedocs(
|
Documenter.makedocs(
|
||||||
|
@ -13,5 +13,37 @@ Documenter.makedocs(
|
||||||
authors = "Brandon Taylor"
|
authors = "Brandon Taylor"
|
||||||
)
|
)
|
||||||
|
|
||||||
# just test if it parses (for now)
|
@testset "examples.bib" begin
|
||||||
joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bib") |> readstring |> parse_bibtex
|
b = open(Bibliography, joinpath("..", "example", "examples.bib"), "r")
|
||||||
|
@test length(b) == 92
|
||||||
|
@test (b["angenendt"]::Citation{:article})["date"] == "2002"
|
||||||
|
end
|
||||||
|
|
||||||
|
@testset "small bib" begin
|
||||||
|
b = Bibliography("""
|
||||||
|
@article{foo, bar=baz}
|
||||||
|
@book{bar, foobar=1}
|
||||||
|
""")
|
||||||
|
@test get(b, "foobar", nothing) === nothing
|
||||||
|
@test get(b["foo"], "blah", nothing) === nothing
|
||||||
|
|
||||||
|
@test string(b["foo"]) == "Citation{:article}(1 entries)"
|
||||||
|
|
||||||
|
Base.rehash!(b)
|
||||||
|
b2 = copy(b)
|
||||||
|
@test length(b2) == length(b)
|
||||||
|
@test isempty(sizehint!(empty!(b2),10))
|
||||||
|
@test isempty(similar(b))
|
||||||
|
b2["x"] = Citation{:foo}()
|
||||||
|
b2["x"]["bar"] = "blah"
|
||||||
|
@test length(b2) == length(b2["x"]) == 1
|
||||||
|
@test b2["x"]["bar"] == "blah"
|
||||||
|
@test get(b2["x"], "foo", nothing) === nothing
|
||||||
|
@test collect(b2)[1][2] == b2["x"]
|
||||||
|
@test collect(b2["x"])[1] == ("bar"=>"blah")
|
||||||
|
Base.rehash!(b2["x"])
|
||||||
|
x2 = copy(b2["x"])::Citation{:foo}
|
||||||
|
@test length(x2) == 1
|
||||||
|
@test isempty(similar(x2))
|
||||||
|
@test isempty(sizehint!(empty!(x2),10))
|
||||||
|
end
|
||||||
|
|
Loading…
Reference in New Issue