Weave.jl/src/reader/markdown.jl

96 lines
2.8 KiB
Julia
Raw Normal View History

2020-05-16 12:52:24 +02:00
function parse_markdown(document_body; is_pandoc = false)
2020-05-16 13:33:48 +02:00
header_text, document_body, offset = separate_header_text(document_body)
header = parse_header(header_text)
code_start, code_end = if is_pandoc
r"^<<(?<options>.*?)>>=\s*$",
r"^@\s*$"
else
2021-04-29 13:12:08 +02:00
r"^[`~]{3}(\{?)julia\s*([;,\{]?)\s*(?<options>.*?)(\}|\s*)$",
2020-05-16 13:33:48 +02:00
r"^[`~]{3}\s*$"
end
2020-05-16 12:52:24 +02:00
return header, parse_markdown_body(document_body, code_start, code_end, offset)
end
2020-05-16 12:52:24 +02:00
# headers
# -------
const HEADER_REGEX = r"^---$(?<header>((?!---).)+)^---$"ms
# TODO: non-Weave headers should keep live in a doc
# separates header section from `text`
function separate_header_text(text)
m = match(HEADER_REGEX, text)
isnothing(m) && return "", text, 0
header_text = m[:header]
2020-05-16 13:49:52 +02:00
offset = @static if VERSION v"1.4"
count("\n", header_text)
else
count(c->c==='\n', header_text)
end
return header_text, replace(text, HEADER_REGEX => ""; count = 1), offset
2020-05-16 12:52:24 +02:00
end
# HACK:
# YAML.jl can't parse text including ``` characters, so first replace all the inline code
# with these temporary code start/end string
const HEADER_INLINE_START = "<weave_header_inline_start>"
const HEADER_INLINE_END = "<weave_header_inline_end>"
function parse_header(header_text)
isempty(header_text) && return Dict()
pat = INLINE_REGEX => SubstitutionString("$(HEADER_INLINE_START)\\1$(HEADER_INLINE_END)")
header_text = replace(header_text, pat)
return YAML.load(header_text)
end
# body
# ----
function parse_markdown_body(document_body, code_start, code_end, offset)
lines = split(document_body, '\n')
2020-05-24 14:21:15 +02:00
state = :doc
doc_no = 0
code_no = 0
content = ""
2020-05-16 12:52:24 +02:00
start_line = offset
options = OptionDict()
2020-05-24 14:21:15 +02:00
option_string = ""
chunks = WeaveChunk[]
2020-05-24 14:21:15 +02:00
for (line_no, line) in enumerate(lines)
m = match(code_start, line)
2020-05-24 14:21:15 +02:00
if !isnothing(m) && state === :doc
state = :code
option_string = isnothing(m[:options]) ? "" : strip(m[:options])
options = parse_options(option_string)
haskey(options, :label) && (options[:name] = options[:label])
haskey(options, :name) || (options[:name] = nothing)
2020-05-24 14:21:15 +02:00
isempty(strip(content)) || push!(chunks, DocChunk(content, doc_no += 1, start_line))
2020-05-24 14:21:15 +02:00
start_line = line_no + offset
content = ""
continue
end
2020-05-24 14:21:15 +02:00
if occursin(code_end, line) && state === :code
push!(chunks, CodeChunk(content, code_no += 1, start_line, option_string, options))
2020-05-24 14:21:15 +02:00
start_line = line_no + offset
content = ""
2020-05-24 14:21:15 +02:00
state = :doc
continue
end
2020-05-24 14:21:15 +02:00
content *= isone(line_no) ? line : string('\n', line)
end
# Remember the last chunk
2020-05-24 14:21:15 +02:00
isempty(strip(content)) || push!(chunks, DocChunk(content, doc_no += 1, start_line))
return chunks
end