fix code chunk line numbers

pull/331/head
Shuhei Kadowaki 2020-05-16 19:52:24 +09:00
parent 11c720ad7f
commit 2a6ee38850
6 changed files with 61 additions and 76 deletions

View File

@ -26,7 +26,7 @@ function restore_chunk(chunk::CodeChunk, cached)
# Chunk types, don't match after loading. Fix by constructing chunks
# from loaded content
new_chunks = Any[]
new_chunks = []
for c in chunks
newc = CodeChunk(c.content, c.number, c.start_line, c.optionstring, c.options)
newc.result_no = c.result_no

View File

@ -1,23 +1,49 @@
"""
parse_markdown(document_body, is_pandoc = false)::Vector{WeaveChunk}
parse_markdown(document_body, code_start, code_end)::Vector{WeaveChunk}
Parses Weave markdown and returns [`WeaveChunk`](@ref)s.
"""
function parse_markdown end
function parse_markdown(document_body, is_pandoc = false)::Vector{WeaveChunk}
code_start, code_end = if is_pandoc
r"^<<(?<options>.*?)>>=\s*$",
r"^@\s*$"
function parse_markdown(document_body; is_pandoc = false)
if is_pandoc
header = Dict()
offset = 0
code_start = r"^<<(?<options>.*?)>>=\s*$"
code_end = r"^@\s*$"
else
r"^[`~]{3}(?:\{?)julia(?:;?)\s*(?<options>.*?)(\}|\s*)$",
r"^[`~]{3}\s*$"
header_text, document_body, offset = separate_header_text(document_body)
header = parse_header(header_text)
code_start = r"^[`~]{3}(?:\{?)julia(?:;?)\s*(?<options>.*?)(\}|\s*)$"
code_end = r"^[`~]{3}\s*$"
end
return parse_markdown(document_body, code_start, code_end)
return header, parse_markdown_body(document_body, code_start, code_end, offset)
end
function parse_markdown(document_body, code_start, code_end)::Vector{WeaveChunk}
# headers
# -------
const HEADER_REGEX = r"^---$(?<header>((?!---).)+)^---$"ms
# TODO: non-Weave headers should keep live in a doc
# separates header section from `text`
function separate_header_text(text)
m = match(HEADER_REGEX, text)
isnothing(m) && return "", text, 0
header_text = m[:header]
return header_text, replace(text, HEADER_REGEX => ""; count = 1), count("\n", header_text)
end
# HACK:
# YAML.jl can't parse text including ``` characters, so first replace all the inline code
# with these temporary code start/end string
const HEADER_INLINE_START = "<weave_header_inline_start>"
const HEADER_INLINE_END = "<weave_header_inline_end>"
function parse_header(header_text)
isempty(header_text) && return Dict()
pat = INLINE_REGEX => SubstitutionString("$(HEADER_INLINE_START)\\1$(HEADER_INLINE_END)")
header_text = replace(header_text, pat)
return YAML.load(header_text)
end
# body
# ----
function parse_markdown_body(document_body, code_start, code_end, offset)
lines = split(document_body, '\n')
state = "doc"
@ -25,7 +51,7 @@ function parse_markdown(document_body, code_start, code_end)::Vector{WeaveChunk}
docno = 1
codeno = 1
content = ""
start_line = 0
start_line = offset
options = Dict()
optionString = ""
@ -57,7 +83,7 @@ function parse_markdown(document_body, code_start, code_end)::Vector{WeaveChunk}
end
content = ""
start_line = lineno
start_line = lineno + offset
continue
end
@ -66,7 +92,7 @@ function parse_markdown(document_body, code_start, code_end)::Vector{WeaveChunk}
chunk = CodeChunk(content, codeno, start_line, optionString, options)
codeno += 1
start_line = lineno
start_line = lineno + offset
content = ""
state = "doc"
push!(chunks, chunk)

View File

@ -1,9 +1,4 @@
"""
parse_notebook(document_body)::Vector{WeaveChunk}
Parses Jupyter notebook and returns [`WeaveChunk`](@ref)s.
"""
function parse_notebook(document_body)::Vector{WeaveChunk}
function parse_notebook(document_body)
nb = JSON.parse(document_body)
chunks = WeaveChunk[]
options = Dict{Symbol,Any}()
@ -25,5 +20,5 @@ function parse_notebook(document_body)::Vector{WeaveChunk}
end
end
return chunks
return Dict(), chunks
end

View File

@ -55,11 +55,8 @@ end
function parse_doc(document, informat)
document = replace(document, "\r\n" => "\n") # normalize line ending
header_text, document = separate_header_text(document)
return parse_header(header_text),
informat == "markdown" ? parse_markdown(document) :
informat == "noweb" ? parse_markdown(document, true) :
return informat == "markdown" ? parse_markdown(document) :
informat == "noweb" ? parse_markdown(document; is_pandoc = true) :
informat == "script" ? parse_script(document) :
informat == "notebook" ? parse_notebook(document) :
error("unsupported input format given: $informat")
@ -128,32 +125,6 @@ end
parse_inline(text) = Inline[InlineText(text, 1, length(text), 1)]
# headers
# -------
const HEADER_REGEX = r"^---$(?<header>((?!---).)+)^---$"ms
# TODO: non-Weave headers should keep live in a doc
# separates header section from `text`
function separate_header_text(text)
m = match(HEADER_REGEX, text)
isnothing(m) && return "", text
return m[:header], replace(text, HEADER_REGEX => ""; count = 1)
end
# HACK:
# YAML.jl can't parse text including ``` characters, so first replace all the inline code
# with these temporary code start/end string
const HEADER_INLINE_START = "<weave_header_inline_start>"
const HEADER_INLINE_END = "<weave_header_inline_end>"
function parse_header(header_text)
isempty(header_text) && return Dict()
pat = INLINE_REGEX => SubstitutionString("$(HEADER_INLINE_START)\\1$(HEADER_INLINE_END)")
header_text = replace(header_text, pat)
return YAML.load(header_text)
end
include("markdown.jl")
include("script.jl")
include("notebook.jl")

View File

@ -1,9 +1,4 @@
"""
parse_script(document_body)::Vector{WeaveChunk}
Parse Julia script and returns [`WeaveChunk`](@ref)s.
"""
function parse_script(document_body)::Vector{WeaveChunk}
function parse_script(document_body)
lines = split(document_body, "\n")
doc_line = r"(^#'.*)|(^#%%.*)|(^# %%.*)"
@ -20,8 +15,6 @@ function parse_script(document_body)::Vector{WeaveChunk}
optionString = ""
chunks = WeaveChunk[]
state = "code"
lineno = 1
n_emptylines = 0
for lineno = 1:length(lines)
line = lines[lineno]
@ -81,12 +74,6 @@ function parse_script(document_body)::Vector{WeaveChunk}
docno += 1
end
read *= line * "\n"
if strip(line) == ""
n_emptylines += 1
else
n_emptylines = 0
end
end
# Handle the last chunk
@ -98,5 +85,5 @@ function parse_script(document_body)::Vector{WeaveChunk}
push!(chunks, chunk)
end
return chunks
return Dict(), chunks
end

View File

@ -17,14 +17,20 @@ ms = collect(eachmatch(Weave.INLINE_REGEXES, doc))
@test ms[2][1] == "code"
@test ms[3][1] == "show(\"is\")"
chunk = Weave.parse_markdown(doc)[1]
let
_, chunks = Weave.parse_markdown(doc)
chunk = first(chunks)
@test length(chunk.content) == 7
@test chunk.content[2].content == ms[1][2]
@test chunk.content[4].content == ms[2][1]
@test chunk.content[6].content == ms[3][1]
end
chunknw = Weave.parse_markdown(doc, false)[1]
@test all([chunknw.content[i].content == chunk.content[i].content for i in 1:7])
let
_, chunks = Weave.parse_markdown(doc)
chunk = first(chunks)
@test all([chunk.content[i].content == chunk.content[i].content for i in 1:7])
end
# Test with document