Merge pull request #4 from stevengj/bibtype

construct Bib and BibItem types for better I/O
This commit is contained in:
bramtayl 2017-08-01 22:45:51 -04:00 committed by GitHub
commit 251f16ce9f
7 changed files with 291 additions and 175 deletions

View File

@ -55,12 +55,12 @@
indextitle = {Effect of immobilization on catalytic characteristics}, indextitle = {Effect of immobilization on catalytic characteristics},
} }
@article{angenendt, @Article{angenendt,
author = {Angenendt, Arnold}, author = {Angenendt, Arnold},
title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der
Patrozinienkunde}, Patrozinienkunde},
journaltitle = {Revue d'Histoire Eccl{\'e}siastique}, journaltitle = {Revue d'Histoire Eccl{\'e}siastique},
date = 2002, Date = 2002,
volume = 97, volume = 97,
pages = {431--456, 791--823}, pages = {431--456, 791--823},
langid = {german}, langid = {german},

View File

@ -1,173 +1,8 @@
module BibTeX module BibTeX
export Bibliography, Citation
struct Parser{T} include("parser.jl")
tokens::T include("citation.jl")
substitutions::Dict{String, String} include("bibliography.jl")
records::Dict{String, Dict{String, String}}
line::Ref{Int}
end
Base.eltype(p::Parser) = eltype(p.tokens)
Base.one(p::Parser) = eltype(p)("")
Parser(tokens::T, substitutions, records, line) where T =
Parser{T}(tokens, substitutions, records, line)
parse_text(text) = begin
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
end
location(parser) = "on line $(parser.line.x)"
next_token_default!(parser) =
if isempty(parser.tokens)
one(parser)
else
result = shift!(parser.tokens)
if result == "\n"
parser.line.x = parser.line.x + 1
next_token_default!(parser)
else
result
end
end
next_token!(parser, eol = "additional tokens") = begin
result = next_token_default!(parser)
if result == ""
error("Expected $eol $(location(parser))")
else
result
end
end
expect(parser, result, expectation) =
if result != expectation
error("Expected $expectation $(location(parser))")
end
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
token_and_counter!(parser, bracket_counter = 1) = begin
token = next_token!(parser, "}")
if token == "{"
bracket_counter += 1
elseif token == "}"
bracket_counter -= 1
end
token, bracket_counter
end
value!(parser, values = eltype(parser)[]) = begin
token = next_token!(parser)
if token == "\""
token = next_token!(parser, "\"")
while token != "\""
push!(values, token)
token = next_token!(parser, "\"")
end
elseif token == "{"
token, counter = token_and_counter!(parser)
while counter > 0
push!(values, token)
token, counter = token_and_counter!(parser, counter)
end
else
push!(values, getkey(parser.substitutions, token, String(token) ) )
end
token = next_token!(parser, ", or }")
if token == "#"
value!(parser, values)
else
token, join(values, " ")
end
end
field!(parser, dict) = begin
token = ","
while token == ","
token = next_token!(parser, "a new entry or }")
if token != "}"
key = token
expect!(parser, "=")
token, dict[key] = value!(parser)
end
end
expect(parser, token, "}")
end
export parse_bibtex
"""
parse_bibtex(text)
This is a simple input parser for BibTex. I had trouble finding a standard
specification, but I've included several features of real BibTex. Returns
a preamble (or an empty string) and a dict of dicts.
```jldoctest
julia> using BibTeX
julia> preamble, result = parse_bibtex(""\"
@preamble{some instructions}
@comment blah blah
@string{short = long}
@a{b,
c = { {c} c},
d = "d d",
e = f # short
}
""\");
julia> preamble
"some instructions"
julia> result["b"]["type"]
"a"
julia> result["b"]["c"]
"{ c } c"
julia> result["b"]["d"]
"d d"
julia> result["b"]["e"]
"f short"
julia> parse_bibtex("@book")
ERROR: Expected { on line 1
[...]
julia> parse_bibtex("@book@")
ERROR: Expected { on line 1
[...]
```
"""
parse_bibtex(text) = begin
parser = parse_text(text)
token = next_token_default!(parser)
preamble = ""
while token != ""
if token == "@"
record_type = lowercase(next_token!(parser))
if record_type == "preamble"
trash, preamble = value!(parser)
elseif record_type != "comment"
expect!(parser, "{")
if record_type == "string"
field!(parser, parser.substitutions)
else
id = next_token!(parser)
dict = Dict("type" => record_type)
expect!(parser, ",")
field!(parser, dict)
parser.records[id] = dict
end
end
end
token = next_token_default!(parser)
end
preamble, parser.records
end
end end

39
src/bibliography.jl Normal file
View File

@ -0,0 +1,39 @@
struct Bibliography <: Associative{String,Citation}
preamble::String
data::Dict{String,Citation}
end
"""
Bibliography(bibtex::String)
Bibliography(io::IO)
Given a string (or IO stream) of bibtex-format bibliography data,
parses the data and returns a `Dict`-like object `b::Bibliography` that
behaves as a dictionary mapping strings to bibliography items
[`Citation`](@ref).
"""
function Bibliography(bibtex::String)
preamble, data = parse_bibtex(bibtex)
return Bibliography(preamble, Dict(k=>Citation!(v) for (k,v) in data))
end
Bibliography(io::IO) = Bibliography(readstring(io))
Base.open(::Type{Bibliography}, args...) = open(io -> Bibliography(io), args...)
Base.similar(b::Bibliography) = Bibliography("", Dict{String,Citation}())
Base.rehash!(b::Bibliography, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
Base.sizehint!(b::Bibliography, n) = begin sizehint!(b.data, n); b; end
Base.empty!(b::Bibliography) = begin empty!(b.data); b; end
Base.copy(b::Bibliography) = Bibliography(b.preamble, copy(b.data))
function Base.setindex!(b::Bibliography, v::Citation, k::AbstractString)
b.data[String(k)] = v
return b
end
Base.get(b::Bibliography, k::AbstractString, default) = get(b.data, String(k), default)
Base.start(b::Bibliography) = start(b.data)
Base.done(b::Bibliography, i) = done(b.data, i)
Base.next(b::Bibliography, i) = next(b.data, i)
Base.length(b::Bibliography) = length(b.data)
# todo: add specialized Base.show methods for MIME"text/bibtex" etc.

42
src/citation.jl Normal file
View File

@ -0,0 +1,42 @@
"""
Citation{S}(data::Dict{String,String})
A bibliography item in a bibTeX database, based on a dictionary of
strings to values. It is parameterized by a symbol `S` giving the
type of the item (`:article` etcetera). A `b::Citation` supports
`b[key]` access to retrieve the data and in general acts like
a dictionary from `String` to `String`.
"""
struct Citation{S} <: Associative{String,String}
data::Dict{String,String}
end
Citation{S}() where {S} = Citation{S}(Dict{String,String}())
function Citation!(data::Dict{String,String})
S = Symbol(pop!(data, "__type__"))
return Citation{S}(data)
end
Base.similar(b::Citation{S}) where {S} = Citation{S}(Dict{String,String}())
Base.rehash!(b::Citation, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
Base.sizehint!(b::Citation, n) = begin sizehint!(b.data, n); b; end
Base.empty!(b::Citation) = begin empty!(b.data); b; end
Base.copy(b::Citation{S}) where {S} = Citation{S}(copy(b.data))
Base.get(b::Citation, k::AbstractString, default) = get(b.data, String(k), default)
Base.getindex(b::Citation, k::AbstractString) = getindex(b.data, String(k))
function Base.setindex!(b::Citation, v::AbstractString, k::AbstractString)
b.data[String(k)] = String(v)
return b
end
Base.start(b::Citation) = start(b.data)
Base.done(b::Citation, i) = done(b.data, i)
Base.next(b::Citation, i) = next(b.data, i)
Base.length(b::Citation) = length(b.data)
function Base.show{S}(io::IO, b::Citation{S})
print(io, "Citation{:$S}(", length(b), " entries)")
end
# TODO: add Base.show text/plain and text/markdown for formatted citation

168
src/parser.jl Normal file
View File

@ -0,0 +1,168 @@
struct Parser{T}
tokens::T
substitutions::Dict{String, String}
records::Dict{String, Dict{String, String}}
line::Ref{Int}
end
Base.eltype(p::Parser) = eltype(p.tokens)
Base.one(p::Parser) = eltype(p)("")
Parser(tokens::T, substitutions, records, line) where T =
Parser{T}(tokens, substitutions, records, line)
parse_text(text) = begin
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
end
location(parser) = "on line $(parser.line.x)"
next_token_default!(parser) =
if isempty(parser.tokens)
one(parser)
else
result = shift!(parser.tokens)
if result == "\n"
parser.line.x = parser.line.x + 1
next_token_default!(parser)
else
result
end
end
next_token!(parser, eol = "additional tokens") = begin
result = next_token_default!(parser)
if result == ""
error("Expected $eol $(location(parser))")
else
result
end
end
expect(parser, result, expectation) =
if result != expectation
error("Expected $expectation $(location(parser))")
end
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
token_and_counter!(parser, bracket_counter = 1) = begin
token = next_token!(parser, "}")
if token == "{"
bracket_counter += 1
elseif token == "}"
bracket_counter -= 1
end
token, bracket_counter
end
value!(parser, values = eltype(parser)[]) = begin
token = next_token!(parser)
if token == "\""
token = next_token!(parser, "\"")
while token != "\""
push!(values, token)
token = next_token!(parser, "\"")
end
elseif token == "{"
token, counter = token_and_counter!(parser)
while counter > 0
push!(values, token)
token, counter = token_and_counter!(parser, counter)
end
else
push!(values, getkey(parser.substitutions, token, String(token) ) )
end
token = next_token!(parser, ", or }")
if token == "#"
value!(parser, values)
else
token, join(values, " ")
end
end
field!(parser, dict) = begin
token = ","
while token == ","
token = next_token!(parser, "a new entry or }")
if token != "}"
key = token
expect!(parser, "=")
token, dict[lowercase(key)] = value!(parser)
end
end
expect(parser, token, "}")
end
"""
parse_bibtex(text)
This is a simple input parser for BibTex. I had trouble finding a standard
specification, but I've included several features of real BibTex. Returns
a preamble (or an empty string) and a dict of dicts.
```jldoctest
julia> using BibTeX: parse_bibtex
julia> preamble, result = parse_bibtex(""\"
@preamble{some instructions}
@comment blah blah
@string{short = long}
@a{b,
c = { {c} c},
d = "d d",
e = f # short
}
""\");
julia> preamble
"some instructions"
julia> result["b"]["__type__"]
"a"
julia> result["b"]["c"]
"{ c } c"
julia> result["b"]["d"]
"d d"
julia> result["b"]["e"]
"f short"
julia> parse_bibtex("@book")
ERROR: Expected { on line 1
[...]
julia> parse_bibtex("@book@")
ERROR: Expected { on line 1
[...]
```
"""
parse_bibtex(text) = begin
parser = parse_text(text)
token = next_token_default!(parser)
preamble = ""
while token != ""
if token == "@"
record_type = lowercase(next_token!(parser))
if record_type == "preamble"
trash, preamble = value!(parser)
elseif record_type != "comment"
expect!(parser, "{")
if record_type == "string"
field!(parser, parser.substitutions)
else
id = next_token!(parser)
dict = Dict("__type__" => record_type)
expect!(parser, ",")
field!(parser, dict)
parser.records[id] = dict
end
end
end
token = next_token_default!(parser)
end
preamble, parser.records
end

View File

@ -3,4 +3,4 @@ const file = joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bi
using BenchmarkTools using BenchmarkTools
using BibTeX using BibTeX
@benchmark parse_bibtex(file) @benchmark BibTeX.parse_bibtex(file)

View File

@ -1,4 +1,4 @@
using BibTeX using BibTeX, Base.Test
import Documenter import Documenter
Documenter.makedocs( Documenter.makedocs(
@ -13,5 +13,37 @@ Documenter.makedocs(
authors = "Brandon Taylor" authors = "Brandon Taylor"
) )
# just test if it parses (for now) @testset "examples.bib" begin
joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bib") |> readstring |> parse_bibtex b = open(Bibliography, joinpath("..", "example", "examples.bib"), "r")
@test length(b) == 92
@test (b["angenendt"]::Citation{:article})["date"] == "2002"
end
@testset "small bib" begin
b = Bibliography("""
@article{foo, bar=baz}
@book{bar, foobar=1}
""")
@test get(b, "foobar", nothing) === nothing
@test get(b["foo"], "blah", nothing) === nothing
@test string(b["foo"]) == "Citation{:article}(1 entries)"
Base.rehash!(b)
b2 = copy(b)
@test length(b2) == length(b)
@test isempty(sizehint!(empty!(b2),10))
@test isempty(similar(b))
b2["x"] = Citation{:foo}()
b2["x"]["bar"] = "blah"
@test length(b2) == length(b2["x"]) == 1
@test b2["x"]["bar"] == "blah"
@test get(b2["x"], "foo", nothing) === nothing
@test collect(b2)[1][2] == b2["x"]
@test collect(b2["x"])[1] == ("bar"=>"blah")
Base.rehash!(b2["x"])
x2 = copy(b2["x"])::Citation{:foo}
@test length(x2) == 1
@test isempty(similar(x2))
@test isempty(sizehint!(empty!(x2),10))
end