avoid inserting extraneous spaces

2017-08-02 20:04:31 -04:00 · 2017-08-02 20:04:31 -04:00 · 88c6e10e83
parent 75d1b6d74c
commit 88c6e10e83
1 changed files with 21 additions and 11 deletions
--- a/src/parser.jl
+++ b/src/parser.jl
@ -12,7 +12,7 @@ Parser(tokens::T, substitutions, records, line) where T =
    Parser{T}(tokens, substitutions, records, line)

 parse_text(text) = begin
-    tokens = matchall(r"[^\s\n\"#{}@,=]+|\n|\"|#|{|}|@|,|=", text)
+    tokens = matchall(r"[^\s\"#{}@,=]+|\s+|\"|#|{|}|@|,|=", text)
    Parser(tokens, Dict{String, String}(), Dict{String, String}(), Ref(1))
 end

@ -23,15 +23,15 @@ next_token_default!(parser) =
        one(parser)
    else
        result = shift!(parser.tokens)
-        if result == "\n"
-            parser.line.x = parser.line.x + 1
-            next_token_default!(parser)
+        parser.line.x = parser.line.x + count(x -> x == '\n', result)
+        if all(isspace, result)
+            eltype(parser)(" ")
        else
            result
        end
    end

-next_token!(parser, eol = "additional tokens") = begin
+next_token_with_space!(parser, eol = "additional tokens") = begin
    result = next_token_default!(parser)
    if result == ""
        error("Expected $eol $(location(parser))")
@ -40,6 +40,15 @@ next_token!(parser, eol = "additional tokens") = begin
    end
 end

+next_token!(parser, eol = "additional tokens") = begin
+    result = next_token_with_space!(parser, eol)
+    if all(isspace, result)
+        next_token_with_space!(parser, eol)
+    else
+        result
+    end
+end
+
 expect(parser, result, expectation) =
    if result != expectation
        error("Expected $expectation $(location(parser))")
@ -48,7 +57,7 @@ expect(parser, result, expectation) =
 expect!(parser, expectation) = expect(parser, next_token!(parser, expectation), expectation)

 token_and_counter!(parser, bracket_counter = 1) = begin
-    token = next_token!(parser, "}")
+    token = next_token_with_space!(parser, "}")
    if token == "{"
        bracket_counter += 1
    elseif token == "}"
@ -60,10 +69,10 @@ end
 value!(parser, values = eltype(parser)[]) = begin
    token = next_token!(parser)
    if token == "\""
-        token = next_token!(parser, "\"")
+        token = next_token_with_space!(parser, "\"")
        while token != "\""
            push!(values, token)
-            token = next_token!(parser, "\"")
+            token = next_token_with_space!(parser, "\"")
        end
    elseif token == "{"
        token, counter = token_and_counter!(parser)
@ -76,9 +85,10 @@ value!(parser, values = eltype(parser)[]) = begin
    end
    token = next_token!(parser, ", or }")
    if token == "#"
+        push!(values, " ")
        value!(parser, values)
    else
-        token, join(values, " ")
+        token, join(values)
    end
 end

@ -114,7 +124,7 @@ julia> preamble, result = parse_bibtex(""\"
            @comment blah blah
            @string{short = long}
            @a{b,
-              c = { {c} c},
+              c = {{c} c},
              d = "d d",
              e = f # short
            }
@ -127,7 +137,7 @@ julia> result["b"]["__type__"]
 "a"

 julia> result["b"]["c"]
-"{ c } c"
+"{c} c"

 julia> result["b"]["d"]
 "d d"