added in parser

2017-07-30 11:02:08 -04:00 · 2017-07-30 11:02:08 -04:00 · 10d0eebc31
parent e8e0983528
commit 10d0eebc31
1 changed files with 133 additions and 7 deletions
--- a/src/BibTeX.jl
+++ b/src/BibTeX.jl
@ -1,17 +1,143 @@
 module BibTeX

-"""
-    test_function()
+struct Parser
+    tokens::Vector{String}
+    substitutions::Dict{String, String}
+    records::Dict{String, Dict{String, String}}
+    line::Ref{Int}
+end

-Return 1
+Parser(text) = begin
+    without_comments = replace(text, r"%.*\n", "\n")
+    tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", without_comments)
+    Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
+end
+
+location(parser) = "on line $(parser.line.x)"
+
+next_token!(parser, eol = "additional tokens") =
+    if length(parser.tokens) < 1
+        error("Expected $eol $(location(parser))")
+    else
+        result = shift!(parser.tokens)
+        if result == "\n"
+            parser.line.x = parser.line.x + 1
+            next_token!(parser, eol)
+        else
+            result
+        end
+    end
+
+expect(parser, result, expectation) =
+    if result != expectation
+        error("Expected $expectation $(location(parser))")
+    end
+
+expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)
+
+value!(parser, values = String[]) = begin
+    token = next_token!(parser)
+    if token == "\""
+        token = next_token!(parser, "\"")
+        while token != "\""
+            push!(values, token)
+            token = next_token!(parser, "\"")
+        end
+    elseif token == "{"
+        bracket_counter = 1
+        while bracket_counter > 0
+            token = next_token!(parser, "}")
+            if token == "{"
+                bracket_counter += 1
+            elseif token == "}"
+                bracket_counter -= 1
+            else
+                push!(values, token)
+            end
+        end
+    else
+        push!(values, getkey(parser.substitutions, token, token) )
+    end
+    token = next_token!(parser, ", or }")
+    if token == "#"
+        value!(parser, values)
+    else
+        token, join(values, " ")
+    end
+end
+
+field!(parser, dict) = begin
+    token = ","
+    while token == ","
+        token = next_token!(parser, "a new entry or }")
+        if token != "}"
+            key = token
+            expect!(parser, "=")
+            token, dict[key] = value!(parser)
+        end
+    end
+    expect(parser, token, "}")
+end
+
+"""
+    parse_bibtex(text)
+
+This is a simple, input parser for BibTex. I had trouble finding a standard
+specification, but I've included several features of real BibTex.

 ```jldoctest
-julia> import BibTeX
+julia> result = parse_bibtex(""\"
+            @comment blah blah
+            @string{short = long}
+            @a{b,
+              c = {c {c}}, % blah blah
+              d = "d d",
+              e = f # short
+            }
+            ""\");

-julia> BibTeX.test_function()
-2
+julia> result["b"]["type"]
+a
+
+julia> result["b"]["c"]
+c c
+
+julia> result["b"]["d"]
+d d
+
+julia> result["b"]["e"]
+f short
+
+julia> parse_bibtex("@book")
+Expected { on line 1
+[...]
+
+julia> parse_bibtex("@book@")
+Expected { on line 1
+[...]
 ```
 """
-test_function() = 1
+parse_bibtex(text) = begin
+    parser = Parser(text)
+    while !isempty(parser.tokens)
+        token = shift!(parser.tokens)
+        if token == "@"
+            record_type = next_token!(parser)
+            if !(record_type in ["comment", "preamble"])
+                expect!(parser, "{")
+                if record_type == "string"
+                    field!(parser, parser.substitutions)
+                else
+                    id = next_token!(parser)
+                    dict = Dict("type" => record_type)
+                    expect!(parser, ",")
+                    field!(parser, dict)
+                    parser.records[id] = dict
+                end
+            end
+        end
+    end
+    parser.records
+end

 end