fixed some issues

This commit is contained in:
Brandon Taylor 2017-07-31 01:01:32 -04:00
parent 687aea8d99
commit 19b3a8804e
1 changed files with 56 additions and 40 deletions

View File

@ -1,33 +1,47 @@
module BibTeX module BibTeX
struct Parser struct Parser{T}
tokens::Vector{String} tokens::T
substitutions::Dict{String, String} substitutions::Dict{String, String}
records::Dict{String, Dict{String, String}} records::Dict{String, Dict{String, String}}
line::Ref{Int} line::Ref{Int}
end end
Parser(text) = begin Base.eltype(p::Parser) = eltype(p.tokens)
without_comments = replace(text, r"%.*\n", "\n") Base.one(p::Parser) = eltype(p)("")
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", without_comments)
Parser(tokens::T, substitutions, records, line) where T =
Parser{T}(tokens, substitutions, records, line)
parse_text(text) = begin
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1)) Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
end end
location(parser) = "on line $(parser.line.x)" location(parser) = "on line $(parser.line.x)"
next_token!(parser, eol = "additional tokens") = next_token_default!(parser) =
if length(parser.tokens) < 1 if isempty(parser.tokens)
error("Expected $eol $(location(parser))") one(parser)
else else
result = shift!(parser.tokens) result = shift!(parser.tokens)
if result == "\n" if result == "\n"
parser.line.x = parser.line.x + 1 parser.line.x = parser.line.x + 1
next_token!(parser, eol) next_token_default!(parser)
else else
result result
end end
end end
next_token!(parser, eol = "additional tokens") = begin
result = next_token_default!(parser)
if result == ""
error("Expected $eol $(location(parser))")
else
result
end
end
expect(parser, result, expectation) = expect(parser, result, expectation) =
if result != expectation if result != expectation
error("Expected $expectation $(location(parser))") error("Expected $expectation $(location(parser))")
@ -35,7 +49,17 @@ expect(parser, result, expectation) =
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation) expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
value!(parser, values = String[]) = begin token_and_counter!(parser, bracket_counter = 1) = begin
token = next_token!(parser, "}")
if token == "{"
bracket_counter += 1
elseif token == "}"
bracket_counter -= 1
end
token, bracket_counter
end
value!(parser, values) = begin
token = next_token!(parser) token = next_token!(parser)
if token == "\"" if token == "\""
token = next_token!(parser, "\"") token = next_token!(parser, "\"")
@ -44,19 +68,13 @@ value!(parser, values = String[]) = begin
token = next_token!(parser, "\"") token = next_token!(parser, "\"")
end end
elseif token == "{" elseif token == "{"
bracket_counter = 1 token, counter = token_and_counter!(parser)
while bracket_counter > 0 while counter > 0
token = next_token!(parser, "}") push!(values, token)
if token == "{" token, counter = token_and_counter!(parser, counter)
bracket_counter += 1
elseif token == "}"
bracket_counter -= 1
else
push!(values, token)
end
end end
else else
push!(values, getkey(parser.substitutions, token, token) ) push!(values, getkey(parser.substitutions, token, String(token) ) )
end end
token = next_token!(parser, ", or }") token = next_token!(parser, ", or }")
if token == "#" if token == "#"
@ -73,7 +91,7 @@ field!(parser, dict) = begin
if token != "}" if token != "}"
key = token key = token
expect!(parser, "=") expect!(parser, "=")
token, dict[key] = value!(parser) token, dict[key] = value!(parser, eltype(parser)[])
end end
end end
expect(parser, token, "}") expect(parser, token, "}")
@ -83,17 +101,16 @@ export parse_bibtex
""" """
parse_bibtex(text) parse_bibtex(text)
This is a simple, input parser for BibTex. I had trouble finding a standard This is a simple input parser for BibTex. I had trouble finding a standard
specification, but I've included several features of real BibTex. specification, but I've included several features of real BibTex.
```jldoctest ```jldoctest
julia> using BibTeX julia> using BibTeX
julia> result = parse_bibtex(""\" julia> result = parse_bibtex(""\"
@comment blah blah
@string{short = long} @string{short = long}
@a{b, @a{b,
c = {c {c}}, % blah blah c = { {c} c},
d = "d d", d = "d d",
e = f # short e = f # short
} }
@ -103,7 +120,7 @@ julia> result["b"]["type"]
"a" "a"
julia> result["b"]["c"] julia> result["b"]["c"]
"c c" "{ c } c"
julia> result["b"]["d"] julia> result["b"]["d"]
"d d" "d d"
@ -121,24 +138,23 @@ ERROR: Expected { on line 1
``` ```
""" """
parse_bibtex(text) = begin parse_bibtex(text) = begin
parser = Parser(text) parser = parse_text(text)
while !isempty(parser.tokens) token = next_token_default!(parser)
token = shift!(parser.tokens) while token != ""
if token == "@" if token == "@"
record_type = next_token!(parser) record_type = next_token!(parser)
if !(record_type in ["comment", "preamble"]) expect!(parser, "{")
expect!(parser, "{") if lowercase(record_type) == "string"
if record_type == "string" field!(parser, parser.substitutions)
field!(parser, parser.substitutions) else
else id = next_token!(parser)
id = next_token!(parser) dict = Dict("type" => record_type)
dict = Dict("type" => record_type) expect!(parser, ",")
expect!(parser, ",") field!(parser, dict)
field!(parser, dict) parser.records[id] = dict
parser.records[id] = dict
end
end end
end end
token = next_token_default!(parser)
end end
parser.records parser.records
end end