fixed some issues
This commit is contained in:
parent
687aea8d99
commit
19b3a8804e
|
@ -1,33 +1,47 @@
|
||||||
module BibTeX
|
module BibTeX
|
||||||
|
|
||||||
struct Parser
|
struct Parser{T}
|
||||||
tokens::Vector{String}
|
tokens::T
|
||||||
substitutions::Dict{String, String}
|
substitutions::Dict{String, String}
|
||||||
records::Dict{String, Dict{String, String}}
|
records::Dict{String, Dict{String, String}}
|
||||||
line::Ref{Int}
|
line::Ref{Int}
|
||||||
end
|
end
|
||||||
|
|
||||||
Parser(text) = begin
|
Base.eltype(p::Parser) = eltype(p.tokens)
|
||||||
without_comments = replace(text, r"%.*\n", "\n")
|
Base.one(p::Parser) = eltype(p)("")
|
||||||
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", without_comments)
|
|
||||||
|
Parser(tokens::T, substitutions, records, line) where T =
|
||||||
|
Parser{T}(tokens, substitutions, records, line)
|
||||||
|
|
||||||
|
parse_text(text) = begin
|
||||||
|
tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
|
||||||
Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
|
Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
|
||||||
end
|
end
|
||||||
|
|
||||||
location(parser) = "on line $(parser.line.x)"
|
location(parser) = "on line $(parser.line.x)"
|
||||||
|
|
||||||
next_token!(parser, eol = "additional tokens") =
|
next_token_default!(parser) =
|
||||||
if length(parser.tokens) < 1
|
if isempty(parser.tokens)
|
||||||
error("Expected $eol $(location(parser))")
|
one(parser)
|
||||||
else
|
else
|
||||||
result = shift!(parser.tokens)
|
result = shift!(parser.tokens)
|
||||||
if result == "\n"
|
if result == "\n"
|
||||||
parser.line.x = parser.line.x + 1
|
parser.line.x = parser.line.x + 1
|
||||||
next_token!(parser, eol)
|
next_token_default!(parser)
|
||||||
else
|
else
|
||||||
result
|
result
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
next_token!(parser, eol = "additional tokens") = begin
|
||||||
|
result = next_token_default!(parser)
|
||||||
|
if result == ""
|
||||||
|
error("Expected $eol $(location(parser))")
|
||||||
|
else
|
||||||
|
result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
expect(parser, result, expectation) =
|
expect(parser, result, expectation) =
|
||||||
if result != expectation
|
if result != expectation
|
||||||
error("Expected $expectation $(location(parser))")
|
error("Expected $expectation $(location(parser))")
|
||||||
|
@ -35,7 +49,17 @@ expect(parser, result, expectation) =
|
||||||
|
|
||||||
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
|
expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
|
||||||
|
|
||||||
value!(parser, values = String[]) = begin
|
token_and_counter!(parser, bracket_counter = 1) = begin
|
||||||
|
token = next_token!(parser, "}")
|
||||||
|
if token == "{"
|
||||||
|
bracket_counter += 1
|
||||||
|
elseif token == "}"
|
||||||
|
bracket_counter -= 1
|
||||||
|
end
|
||||||
|
token, bracket_counter
|
||||||
|
end
|
||||||
|
|
||||||
|
value!(parser, values) = begin
|
||||||
token = next_token!(parser)
|
token = next_token!(parser)
|
||||||
if token == "\""
|
if token == "\""
|
||||||
token = next_token!(parser, "\"")
|
token = next_token!(parser, "\"")
|
||||||
|
@ -44,19 +68,13 @@ value!(parser, values = String[]) = begin
|
||||||
token = next_token!(parser, "\"")
|
token = next_token!(parser, "\"")
|
||||||
end
|
end
|
||||||
elseif token == "{"
|
elseif token == "{"
|
||||||
bracket_counter = 1
|
token, counter = token_and_counter!(parser)
|
||||||
while bracket_counter > 0
|
while counter > 0
|
||||||
token = next_token!(parser, "}")
|
push!(values, token)
|
||||||
if token == "{"
|
token, counter = token_and_counter!(parser, counter)
|
||||||
bracket_counter += 1
|
|
||||||
elseif token == "}"
|
|
||||||
bracket_counter -= 1
|
|
||||||
else
|
|
||||||
push!(values, token)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
push!(values, getkey(parser.substitutions, token, token) )
|
push!(values, getkey(parser.substitutions, token, String(token) ) )
|
||||||
end
|
end
|
||||||
token = next_token!(parser, ", or }")
|
token = next_token!(parser, ", or }")
|
||||||
if token == "#"
|
if token == "#"
|
||||||
|
@ -73,7 +91,7 @@ field!(parser, dict) = begin
|
||||||
if token != "}"
|
if token != "}"
|
||||||
key = token
|
key = token
|
||||||
expect!(parser, "=")
|
expect!(parser, "=")
|
||||||
token, dict[key] = value!(parser)
|
token, dict[key] = value!(parser, eltype(parser)[])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
expect(parser, token, "}")
|
expect(parser, token, "}")
|
||||||
|
@ -83,17 +101,16 @@ export parse_bibtex
|
||||||
"""
|
"""
|
||||||
parse_bibtex(text)
|
parse_bibtex(text)
|
||||||
|
|
||||||
This is a simple, input parser for BibTex. I had trouble finding a standard
|
This is a simple input parser for BibTex. I had trouble finding a standard
|
||||||
specification, but I've included several features of real BibTex.
|
specification, but I've included several features of real BibTex.
|
||||||
|
|
||||||
```jldoctest
|
```jldoctest
|
||||||
julia> using BibTeX
|
julia> using BibTeX
|
||||||
|
|
||||||
julia> result = parse_bibtex(""\"
|
julia> result = parse_bibtex(""\"
|
||||||
@comment blah blah
|
|
||||||
@string{short = long}
|
@string{short = long}
|
||||||
@a{b,
|
@a{b,
|
||||||
c = {c {c}}, % blah blah
|
c = { {c} c},
|
||||||
d = "d d",
|
d = "d d",
|
||||||
e = f # short
|
e = f # short
|
||||||
}
|
}
|
||||||
|
@ -103,7 +120,7 @@ julia> result["b"]["type"]
|
||||||
"a"
|
"a"
|
||||||
|
|
||||||
julia> result["b"]["c"]
|
julia> result["b"]["c"]
|
||||||
"c c"
|
"{ c } c"
|
||||||
|
|
||||||
julia> result["b"]["d"]
|
julia> result["b"]["d"]
|
||||||
"d d"
|
"d d"
|
||||||
|
@ -121,24 +138,23 @@ ERROR: Expected { on line 1
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
parse_bibtex(text) = begin
|
parse_bibtex(text) = begin
|
||||||
parser = Parser(text)
|
parser = parse_text(text)
|
||||||
while !isempty(parser.tokens)
|
token = next_token_default!(parser)
|
||||||
token = shift!(parser.tokens)
|
while token != ""
|
||||||
if token == "@"
|
if token == "@"
|
||||||
record_type = next_token!(parser)
|
record_type = next_token!(parser)
|
||||||
if !(record_type in ["comment", "preamble"])
|
expect!(parser, "{")
|
||||||
expect!(parser, "{")
|
if lowercase(record_type) == "string"
|
||||||
if record_type == "string"
|
field!(parser, parser.substitutions)
|
||||||
field!(parser, parser.substitutions)
|
else
|
||||||
else
|
id = next_token!(parser)
|
||||||
id = next_token!(parser)
|
dict = Dict("type" => record_type)
|
||||||
dict = Dict("type" => record_type)
|
expect!(parser, ",")
|
||||||
expect!(parser, ",")
|
field!(parser, dict)
|
||||||
field!(parser, dict)
|
parser.records[id] = dict
|
||||||
parser.records[id] = dict
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
token = next_token_default!(parser)
|
||||||
end
|
end
|
||||||
parser.records
|
parser.records
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue