Merge pull request #4 from stevengj/bibtype

construct Bib and BibItem types for better I/O
2017-08-01 22:45:51 -04:00 · 2017-08-01 22:45:51 -04:00 · 251f16ce9f
parent b16776190b e6c0702811
commit 251f16ce9f
7 changed files with 291 additions and 175 deletions
--- a/example/examples.bib
+++ b/example/examples.bib
@ -55,12 +55,12 @@
  indextitle   = {Effect of immobilization on catalytic characteristics},
 }

-@article{angenendt,
+@Article{angenendt,
  author       = {Angenendt, Arnold},
  title        = {In Honore Salvatoris~-- Vom Sinn und Unsinn der
                  Patrozinienkunde},
  journaltitle = {Revue d'Histoire Eccl{\'e}siastique},
-  date         = 2002,
+  Date         = 2002,
  volume       = 97,
  pages        = {431--456, 791--823},
  langid       = {german},
--- a/src/BibTeX.jl
+++ b/src/BibTeX.jl
@ -1,173 +1,8 @@
 module BibTeX
+export Bibliography, Citation

-struct Parser{T}
-    tokens::T
-    substitutions::Dict{String, String}
-    records::Dict{String, Dict{String, String}}
-    line::Ref{Int}
-end
-
-Base.eltype(p::Parser) = eltype(p.tokens)
-Base.one(p::Parser) = eltype(p)("")
-
-Parser(tokens::T, substitutions, records, line) where T =
-    Parser{T}(tokens, substitutions, records, line)
-
-parse_text(text) = begin
-    tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
-    Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
-end
-
-location(parser) = "on line $(parser.line.x)"
-
-next_token_default!(parser) =
-    if isempty(parser.tokens)
-        one(parser)
-    else
-        result = shift!(parser.tokens)
-        if result == "\n"
-            parser.line.x = parser.line.x + 1
-            next_token_default!(parser)
-        else
-            result
-        end
-    end
-
-next_token!(parser, eol = "additional tokens") = begin
-    result = next_token_default!(parser)
-    if result == ""
-        error("Expected $eol $(location(parser))")
-    else
-        result
-    end
-end
-
-expect(parser, result, expectation) =
-    if result != expectation
-        error("Expected $expectation $(location(parser))")
-    end
-
-expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
-
-token_and_counter!(parser, bracket_counter = 1) = begin
-    token = next_token!(parser, "}")
-    if token == "{"
-        bracket_counter += 1
-    elseif token == "}"
-        bracket_counter -= 1
-    end
-    token, bracket_counter
-end
-
-value!(parser, values = eltype(parser)[]) = begin
-    token = next_token!(parser)
-    if token == "\""
-        token = next_token!(parser, "\"")
-        while token != "\""
-            push!(values, token)
-            token = next_token!(parser, "\"")
-        end
-    elseif token == "{"
-        token, counter = token_and_counter!(parser)
-        while counter > 0
-            push!(values, token)
-            token, counter = token_and_counter!(parser, counter)
-        end
-    else
-        push!(values, getkey(parser.substitutions, token, String(token) ) )
-    end
-    token = next_token!(parser, ", or }")
-    if token == "#"
-        value!(parser, values)
-    else
-        token, join(values, " ")
-    end
-end
-
-field!(parser, dict) = begin
-    token = ","
-    while token == ","
-        token = next_token!(parser, "a new entry or }")
-        if token != "}"
-            key = token
-            expect!(parser, "=")
-            token, dict[key] = value!(parser)
-        end
-    end
-    expect(parser, token, "}")
-end
-
-export parse_bibtex
-"""
-    parse_bibtex(text)
-
-This is a simple input parser for BibTex. I had trouble finding a standard
-specification, but I've included several features of real BibTex. Returns
-a preamble (or an empty string) and a dict of dicts.
-
-```jldoctest
-julia> using BibTeX
-
-julia> preamble, result = parse_bibtex(""\"
-            @preamble{some instructions}
-            @comment blah blah
-            @string{short = long}
-            @a{b,
-              c = { {c} c},
-              d = "d d",
-              e = f # short
-            }
-            ""\");
-
-julia> preamble
-"some instructions"
-
-julia> result["b"]["type"]
-"a"
-
-julia> result["b"]["c"]
-"{ c } c"
-
-julia> result["b"]["d"]
-"d d"
-
-julia> result["b"]["e"]
-"f short"
-
-julia> parse_bibtex("@book")
-ERROR: Expected { on line 1
-[...]
-
-julia> parse_bibtex("@book@")
-ERROR: Expected { on line 1
-[...]
-```
-"""
-parse_bibtex(text) = begin
-    parser = parse_text(text)
-    token = next_token_default!(parser)
-    preamble = ""
-    while token != ""
-        if token == "@"
-            record_type = lowercase(next_token!(parser))
-            if record_type == "preamble"
-                trash, preamble = value!(parser)
-            elseif record_type != "comment"
-                expect!(parser, "{")
-                if record_type == "string"
-                    field!(parser, parser.substitutions)
-                else
-                    id = next_token!(parser)
-                    dict = Dict("type" => record_type)
-                    expect!(parser, ",")
-                    field!(parser, dict)
-                    parser.records[id] = dict
-                end
-            end
-        end
-        token = next_token_default!(parser)
-    end
-    preamble, parser.records
-end
+include("parser.jl")
+include("citation.jl")
+include("bibliography.jl")

 end
--- a/src/bibliography.jl
+++ b/src/bibliography.jl
@ -0,0 +1,39 @@
+struct Bibliography <: Associative{String,Citation}
+    preamble::String
+    data::Dict{String,Citation}
+end
+
+"""
+    Bibliography(bibtex::String)
+    Bibliography(io::IO)
+
+Given a string (or IO stream) of bibtex-format bibliography data,
+parses the data and returns a `Dict`-like object `b::Bibliography` that
+behaves as a dictionary mapping strings to bibliography items
+[`Citation`](@ref).
+"""
+function Bibliography(bibtex::String)
+    preamble, data = parse_bibtex(bibtex)
+    return Bibliography(preamble, Dict(k=>Citation!(v) for (k,v) in data))
+end
+Bibliography(io::IO) = Bibliography(readstring(io))
+Base.open(::Type{Bibliography}, args...) = open(io -> Bibliography(io), args...)
+
+Base.similar(b::Bibliography) = Bibliography("", Dict{String,Citation}())
+Base.rehash!(b::Bibliography, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
+Base.sizehint!(b::Bibliography, n) = begin sizehint!(b.data, n); b; end
+Base.empty!(b::Bibliography) = begin empty!(b.data); b; end
+Base.copy(b::Bibliography) = Bibliography(b.preamble, copy(b.data))
+
+function Base.setindex!(b::Bibliography, v::Citation, k::AbstractString)
+    b.data[String(k)] = v
+    return b
+end
+Base.get(b::Bibliography, k::AbstractString, default) = get(b.data, String(k), default)
+
+Base.start(b::Bibliography) = start(b.data)
+Base.done(b::Bibliography, i) = done(b.data, i)
+Base.next(b::Bibliography, i) = next(b.data, i)
+Base.length(b::Bibliography) = length(b.data)
+
+# todo: add specialized Base.show methods for MIME"text/bibtex" etc.
--- a/src/citation.jl
+++ b/src/citation.jl
@ -0,0 +1,42 @@
+"""
+    Citation{S}(data::Dict{String,String})
+
+A bibliography item in a bibTeX database, based on a dictionary of
+strings to values.  It is parameterized by a symbol `S` giving the
+type of the item (`:article` etcetera).  A `b::Citation` supports
+`b[key]` access to retrieve the data and in general acts like
+a dictionary from `String` to `String`.
+"""
+struct Citation{S} <: Associative{String,String}
+    data::Dict{String,String}
+end
+Citation{S}() where {S} = Citation{S}(Dict{String,String}())
+
+function Citation!(data::Dict{String,String})
+    S = Symbol(pop!(data, "__type__"))
+    return Citation{S}(data)
+end
+
+Base.similar(b::Citation{S}) where {S} = Citation{S}(Dict{String,String}())
+Base.rehash!(b::Citation, n=length(b.data)) = begin Base.rehash!(b.data, n); b; end
+Base.sizehint!(b::Citation, n) = begin sizehint!(b.data, n); b; end
+Base.empty!(b::Citation) = begin empty!(b.data); b; end
+Base.copy(b::Citation{S}) where {S} = Citation{S}(copy(b.data))
+
+Base.get(b::Citation, k::AbstractString, default) = get(b.data, String(k), default)
+Base.getindex(b::Citation, k::AbstractString) = getindex(b.data, String(k))
+function Base.setindex!(b::Citation, v::AbstractString, k::AbstractString)
+    b.data[String(k)] = String(v)
+    return b
+end
+
+Base.start(b::Citation) = start(b.data)
+Base.done(b::Citation, i) = done(b.data, i)
+Base.next(b::Citation, i) = next(b.data, i)
+Base.length(b::Citation) = length(b.data)
+
+function Base.show{S}(io::IO, b::Citation{S})
+    print(io, "Citation{:$S}(", length(b), " entries)")
+end
+
+# TODO: add Base.show text/plain and text/markdown for formatted citation
--- a/src/parser.jl
+++ b/src/parser.jl
@ -0,0 +1,168 @@
+struct Parser{T}
+    tokens::T
+    substitutions::Dict{String, String}
+    records::Dict{String, Dict{String, String}}
+    line::Ref{Int}
+end
+
+Base.eltype(p::Parser) = eltype(p.tokens)
+Base.one(p::Parser) = eltype(p)("")
+
+Parser(tokens::T, substitutions, records, line) where T =
+    Parser{T}(tokens, substitutions, records, line)
+
+parse_text(text) = begin
+    tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
+    Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
+end
+
+location(parser) = "on line $(parser.line.x)"
+
+next_token_default!(parser) =
+    if isempty(parser.tokens)
+        one(parser)
+    else
+        result = shift!(parser.tokens)
+        if result == "\n"
+            parser.line.x = parser.line.x + 1
+            next_token_default!(parser)
+        else
+            result
+        end
+    end
+
+next_token!(parser, eol = "additional tokens") = begin
+    result = next_token_default!(parser)
+    if result == ""
+        error("Expected $eol $(location(parser))")
+    else
+        result
+    end
+end
+
+expect(parser, result, expectation) =
+    if result != expectation
+        error("Expected $expectation $(location(parser))")
+    end
+
+expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
+
+token_and_counter!(parser, bracket_counter = 1) = begin
+    token = next_token!(parser, "}")
+    if token == "{"
+        bracket_counter += 1
+    elseif token == "}"
+        bracket_counter -= 1
+    end
+    token, bracket_counter
+end
+
+value!(parser, values = eltype(parser)[]) = begin
+    token = next_token!(parser)
+    if token == "\""
+        token = next_token!(parser, "\"")
+        while token != "\""
+            push!(values, token)
+            token = next_token!(parser, "\"")
+        end
+    elseif token == "{"
+        token, counter = token_and_counter!(parser)
+        while counter > 0
+            push!(values, token)
+            token, counter = token_and_counter!(parser, counter)
+        end
+    else
+        push!(values, getkey(parser.substitutions, token, String(token) ) )
+    end
+    token = next_token!(parser, ", or }")
+    if token == "#"
+        value!(parser, values)
+    else
+        token, join(values, " ")
+    end
+end
+
+field!(parser, dict) = begin
+    token = ","
+    while token == ","
+        token = next_token!(parser, "a new entry or }")
+        if token != "}"
+            key = token
+            expect!(parser, "=")
+            token, dict[lowercase(key)] = value!(parser)
+        end
+    end
+    expect(parser, token, "}")
+end
+
+"""
+    parse_bibtex(text)
+
+This is a simple input parser for BibTex. I had trouble finding a standard
+specification, but I've included several features of real BibTex. Returns
+a preamble (or an empty string) and a dict of dicts.
+
+```jldoctest
+julia> using BibTeX: parse_bibtex
+
+julia> preamble, result = parse_bibtex(""\"
+            @preamble{some instructions}
+            @comment blah blah
+            @string{short = long}
+            @a{b,
+              c = { {c} c},
+              d = "d d",
+              e = f # short
+            }
+            ""\");
+
+julia> preamble
+"some instructions"
+
+julia> result["b"]["__type__"]
+"a"
+
+julia> result["b"]["c"]
+"{ c } c"
+
+julia> result["b"]["d"]
+"d d"
+
+julia> result["b"]["e"]
+"f short"
+
+julia> parse_bibtex("@book")
+ERROR: Expected { on line 1
+[...]
+
+julia> parse_bibtex("@book@")
+ERROR: Expected { on line 1
+[...]
+```
+"""
+parse_bibtex(text) = begin
+    parser = parse_text(text)
+    token = next_token_default!(parser)
+    preamble = ""
+    while token != ""
+        if token == "@"
+            record_type = lowercase(next_token!(parser))
+            if record_type == "preamble"
+                trash, preamble = value!(parser)
+            elseif record_type != "comment"
+                expect!(parser, "{")
+                if record_type == "string"
+                    field!(parser, parser.substitutions)
+                else
+                    id = next_token!(parser)
+                    dict = Dict("__type__" => record_type)
+                    expect!(parser, ",")
+                    field!(parser, dict)
+                    parser.records[id] = dict
+                end
+            end
+        end
+        token = next_token_default!(parser)
+    end
+    preamble, parser.records
+end
--- a/test/benchmark.jl
+++ b/test/benchmark.jl
@ -3,4 +3,4 @@ const file = joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bi
 using BenchmarkTools
 using BibTeX

-@benchmark parse_bibtex(file)
+@benchmark BibTeX.parse_bibtex(file)
--- a/test/runtests.jl
+++ b/test/runtests.jl
@ -1,4 +1,4 @@
-using BibTeX
+using BibTeX, Base.Test

 import Documenter
 Documenter.makedocs(
@ -13,5 +13,37 @@ Documenter.makedocs(
    authors = "Brandon Taylor"
 )

-# just test if it parses (for now)
-joinpath((@__FILE__) |> dirname |> dirname, "example", "examples.bib") |> readstring |> parse_bibtex
+@testset "examples.bib" begin
+    b = open(Bibliography, joinpath("..", "example", "examples.bib"), "r")
+    @test length(b) == 92
+    @test (b["angenendt"]::Citation{:article})["date"] == "2002"
+end
+
+@testset "small bib" begin
+    b = Bibliography("""
+    @article{foo, bar=baz}
+    @book{bar, foobar=1}
+    """)
+    @test get(b, "foobar", nothing) === nothing
+    @test get(b["foo"], "blah", nothing) === nothing
+
+    @test string(b["foo"]) == "Citation{:article}(1 entries)"
+
+    Base.rehash!(b)
+    b2 = copy(b)
+    @test length(b2) == length(b)
+    @test isempty(sizehint!(empty!(b2),10))
+    @test isempty(similar(b))
+    b2["x"] = Citation{:foo}()
+    b2["x"]["bar"] = "blah"
+    @test length(b2) == length(b2["x"]) == 1
+    @test b2["x"]["bar"] == "blah"
+    @test get(b2["x"], "foo", nothing) === nothing
+    @test collect(b2)[1][2] == b2["x"]
+    @test collect(b2["x"])[1] == ("bar"=>"blah")
+    Base.rehash!(b2["x"])
+    x2 = copy(b2["x"])::Citation{:foo}
+    @test length(x2) == 1
+    @test isempty(similar(x2))
+    @test isempty(sizehint!(empty!(x2),10))
+end