2016-12-14 20:50:29 +01:00
|
|
|
import JSON, YAML
|
2016-12-11 19:23:19 +01:00
|
|
|
|
2014-12-01 23:58:12 +01:00
|
|
|
pushopt(options::Dict,expr::Expr) = Base.Meta.isexpr(expr,:(=)) && (options[expr.args[1]] = expr.args[2])
|
|
|
|
|
2017-12-31 13:02:28 +01:00
|
|
|
mutable struct MarkupInput
|
2016-04-21 17:51:06 +02:00
|
|
|
codestart::Regex
|
|
|
|
codeend::Regex
|
2016-12-26 19:06:03 +01:00
|
|
|
inline::Regex
|
2016-04-21 17:51:06 +02:00
|
|
|
end
|
|
|
|
|
2017-12-31 13:02:28 +01:00
|
|
|
mutable struct ScriptInput
|
2016-04-21 17:51:06 +02:00
|
|
|
doc_line::Regex
|
|
|
|
doc_start::Regex
|
|
|
|
opt_line::Regex
|
|
|
|
opt_start::Regex
|
2016-12-26 19:06:03 +01:00
|
|
|
inline::Regex
|
2016-04-21 17:51:06 +02:00
|
|
|
end
|
2014-12-05 23:17:54 +01:00
|
|
|
|
2017-12-31 13:02:28 +01:00
|
|
|
mutable struct NotebookInput
|
2016-12-26 19:06:03 +01:00
|
|
|
inline
|
2016-12-11 19:23:19 +01:00
|
|
|
end
|
|
|
|
|
2016-04-21 17:51:06 +02:00
|
|
|
const input_formats = Dict{AbstractString, Any}(
|
|
|
|
"noweb" => MarkupInput(r"^<<(.*?)>>=\s*$",
|
2016-12-26 19:06:03 +01:00
|
|
|
r"^@\s*$",
|
|
|
|
r"`j\s+(.*?)`"s
|
|
|
|
),
|
2016-04-21 17:51:06 +02:00
|
|
|
"markdown" => MarkupInput(
|
2016-04-26 17:45:26 +02:00
|
|
|
r"^[`~]{3,}(?:\{|\{\.|)julia(?:;|)\s*(.*?)(\}|\s*)$",
|
2016-12-26 19:06:03 +01:00
|
|
|
r"^[`~]{3,}\s*$",
|
|
|
|
r"`j\s+(.*?)`"s),
|
2016-04-21 17:51:06 +02:00
|
|
|
"script" => ScriptInput(
|
|
|
|
r"(^#'.*)|(^#%%.*)|(^# %%.*)",
|
|
|
|
r"(^#')|(^#%%)|(^# %%)",
|
|
|
|
r"(^#\+.*$)|(^#%%\+.*$)|(^# %%\+.*$)",
|
2016-12-26 19:06:03 +01:00
|
|
|
r"(^#\+)|(^#%%\+)|(^# %%\+)",
|
|
|
|
r"`j\s+(.*?)`"s),
|
|
|
|
"notebook" => NotebookInput(nothing) #Don't parse inline code from notebooks
|
2014-12-05 23:17:54 +01:00
|
|
|
)
|
|
|
|
|
2016-04-22 14:39:25 +02:00
|
|
|
"""Detect the input format based on file extension"""
|
|
|
|
function detect_informat(source::AbstractString)
|
|
|
|
ext = lowercase(splitext(source)[2])
|
|
|
|
|
|
|
|
ext == ".jl" && return "script"
|
|
|
|
ext == ".jmd" && return "markdown"
|
2016-12-11 19:23:19 +01:00
|
|
|
ext == ".ipynb" && return "notebook"
|
2016-04-22 14:39:25 +02:00
|
|
|
return "noweb"
|
|
|
|
end
|
|
|
|
|
2016-04-19 14:23:48 +02:00
|
|
|
"""Read and parse input document"""
|
2016-04-22 14:39:25 +02:00
|
|
|
function read_doc(source::AbstractString, format=:auto)
|
|
|
|
format == :auto && (format = detect_informat(source))
|
2018-07-23 12:37:25 +02:00
|
|
|
document = read(source, String)
|
2018-07-23 19:29:07 +02:00
|
|
|
document = replace(document, "\r\n" => "\n")
|
2015-01-06 23:01:25 +01:00
|
|
|
parsed = parse_doc(document, format)
|
2016-12-14 20:50:29 +01:00
|
|
|
header = parse_header(parsed[1])
|
|
|
|
doc = WeaveDoc(source, parsed, header)
|
2019-03-05 18:36:27 +01:00
|
|
|
haskey(header, "options") && header_chunk_defaults!(doc)
|
2016-12-14 20:50:29 +01:00
|
|
|
return doc
|
|
|
|
end
|
|
|
|
|
|
|
|
function parse_header(chunk::CodeChunk)
|
2016-12-14 23:07:56 +01:00
|
|
|
return Dict()
|
2016-12-14 20:50:29 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
function parse_header(chunk::DocChunk)
|
2016-12-26 19:06:03 +01:00
|
|
|
m = match(r"^---$(?<header>.+)^---$"ms, chunk.content[1].content)
|
2016-12-14 20:50:29 +01:00
|
|
|
if m !== nothing
|
|
|
|
header = YAML.load(string(m[:header]))
|
|
|
|
else
|
2016-12-14 23:07:56 +01:00
|
|
|
header = Dict()
|
2016-12-14 20:50:29 +01:00
|
|
|
end
|
|
|
|
return header
|
2015-01-05 23:31:24 +01:00
|
|
|
end
|
|
|
|
|
2016-04-11 17:40:18 +02:00
|
|
|
function parse_doc(document::AbstractString, format="noweb"::AbstractString)
|
2016-04-21 17:51:06 +02:00
|
|
|
return parse_doc(document, input_formats[format])
|
|
|
|
end
|
|
|
|
|
2016-04-21 18:02:08 +02:00
|
|
|
"""Parse documents with Weave.jl markup"""
|
2016-04-21 17:51:06 +02:00
|
|
|
function parse_doc(document::AbstractString, format::MarkupInput)
|
2018-07-23 19:29:07 +02:00
|
|
|
document = replace(document, "\r\n" => "\n")
|
2015-01-05 23:31:24 +01:00
|
|
|
lines = split(document, "\n")
|
2014-12-03 22:34:29 +01:00
|
|
|
|
2016-04-21 17:51:06 +02:00
|
|
|
codestart = format.codestart
|
|
|
|
codeend = format.codeend
|
2014-11-26 22:29:25 +01:00
|
|
|
state = "doc"
|
|
|
|
|
|
|
|
docno = 1
|
|
|
|
codeno = 1
|
|
|
|
content = ""
|
|
|
|
start_line = 0
|
|
|
|
|
|
|
|
options = Dict()
|
2016-04-20 20:31:55 +02:00
|
|
|
optionString = ""
|
2015-01-04 14:18:17 +01:00
|
|
|
parsed = Any[]
|
2014-12-01 22:16:51 +01:00
|
|
|
for lineno in 1:length(lines)
|
|
|
|
line = lines[lineno]
|
2014-12-01 23:58:12 +01:00
|
|
|
if (m = match(codestart, line)) != nothing && state=="doc"
|
2014-11-26 22:29:25 +01:00
|
|
|
state = "code"
|
2014-12-06 15:40:50 +01:00
|
|
|
if m.captures[1] == nothing
|
2016-04-20 20:31:55 +02:00
|
|
|
optionString = ""
|
2014-12-06 15:40:50 +01:00
|
|
|
else
|
2016-04-20 20:31:55 +02:00
|
|
|
optionString=strip(m.captures[1])
|
2014-12-06 15:40:50 +01:00
|
|
|
end
|
2016-12-19 17:42:00 +01:00
|
|
|
|
2014-12-01 23:58:12 +01:00
|
|
|
options = Dict{Symbol,Any}()
|
2016-04-20 20:31:55 +02:00
|
|
|
if length(optionString) > 0
|
2018-07-23 12:37:25 +02:00
|
|
|
expr = Meta.parse(optionString)
|
2014-12-01 23:58:12 +01:00
|
|
|
Base.Meta.isexpr(expr,:(=)) && (options[expr.args[1]] = expr.args[2])
|
|
|
|
Base.Meta.isexpr(expr,:toplevel) && map(pushopt,fill(options,length(expr.args)),expr.args)
|
2014-11-26 22:29:25 +01:00
|
|
|
end
|
2014-12-01 23:58:12 +01:00
|
|
|
haskey(options, :label) && (options[:name] = options[:label])
|
|
|
|
haskey(options, :name) || (options[:name] = nothing)
|
2016-12-19 17:42:00 +01:00
|
|
|
|
|
|
|
if !isempty(strip(content))
|
2016-12-26 19:06:03 +01:00
|
|
|
chunk = DocChunk(content, docno, start_line, format.inline)
|
2016-12-19 17:42:00 +01:00
|
|
|
docno += 1
|
|
|
|
push!(parsed, chunk)
|
|
|
|
end
|
|
|
|
|
|
|
|
content = ""
|
2014-11-26 22:29:25 +01:00
|
|
|
start_line = lineno
|
2016-12-19 17:42:00 +01:00
|
|
|
|
2014-11-26 22:29:25 +01:00
|
|
|
continue
|
2016-12-19 17:42:00 +01:00
|
|
|
|
2014-11-26 22:29:25 +01:00
|
|
|
end
|
2018-07-23 12:37:25 +02:00
|
|
|
if occursin(codeend, line) && state=="code"
|
2015-01-04 14:18:17 +01:00
|
|
|
|
2016-04-20 20:31:55 +02:00
|
|
|
chunk = CodeChunk(content, codeno, start_line, optionString, options)
|
2015-01-04 14:18:17 +01:00
|
|
|
|
2014-11-26 22:29:25 +01:00
|
|
|
codeno+=1
|
|
|
|
start_line = lineno
|
|
|
|
content = ""
|
|
|
|
state = "doc"
|
|
|
|
push!(parsed, chunk)
|
|
|
|
continue
|
|
|
|
end
|
|
|
|
|
2016-04-20 15:32:22 +02:00
|
|
|
if lineno == 1
|
2017-03-13 14:05:07 +01:00
|
|
|
content *= line
|
2016-04-20 15:32:22 +02:00
|
|
|
else
|
|
|
|
content *= "\n" * line
|
|
|
|
end
|
2014-11-26 22:29:25 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
#Remember the last chunk
|
2015-01-04 14:18:17 +01:00
|
|
|
if strip(content) != ""
|
2016-12-26 19:06:03 +01:00
|
|
|
chunk = DocChunk(content, docno, start_line, format.inline)
|
2016-11-03 09:41:29 +01:00
|
|
|
#chunk = Dict{Symbol,Any}(:type => "doc", :content => content,
|
2015-01-04 14:18:17 +01:00
|
|
|
# :number => docno, :start_line => start_line)
|
2014-11-26 22:29:25 +01:00
|
|
|
push!(parsed, chunk)
|
|
|
|
end
|
|
|
|
return parsed
|
2014-12-01 22:06:57 +01:00
|
|
|
end
|
2016-04-21 18:02:08 +02:00
|
|
|
|
|
|
|
"""Parse .jl scripts with Weave.jl markup"""
|
|
|
|
function parse_doc(document::AbstractString, format::ScriptInput)
|
2018-07-23 19:35:15 +02:00
|
|
|
document = replace(document, "\r\n" => "\n")
|
2016-04-21 18:02:08 +02:00
|
|
|
lines = split(document, "\n")
|
|
|
|
|
|
|
|
doc_line = format.doc_line
|
|
|
|
doc_start = format.doc_start
|
|
|
|
opt_line = format.opt_line
|
|
|
|
opt_start = format.opt_start
|
|
|
|
|
|
|
|
read = ""
|
|
|
|
chunks = []
|
|
|
|
docno = 1
|
|
|
|
codeno = 1
|
|
|
|
content = ""
|
|
|
|
start_line = 1
|
|
|
|
options = Dict{Symbol,Any}()
|
|
|
|
optionString = ""
|
|
|
|
parsed = Any[]
|
|
|
|
state = "code"
|
|
|
|
lineno = 1
|
|
|
|
n_emptylines = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for lineno in 1:length(lines)
|
|
|
|
line = lines[lineno]
|
|
|
|
if (m = match(doc_line, line)) != nothing && (m = match(opt_line, line)) == nothing
|
2018-07-23 19:35:15 +02:00
|
|
|
line = replace(line, doc_start => "", count=1)
|
2016-04-21 18:02:08 +02:00
|
|
|
if startswith(line, " ")
|
2018-07-23 19:35:15 +02:00
|
|
|
line = replace(line, " " => "", count=1)
|
2016-04-21 18:02:08 +02:00
|
|
|
end
|
|
|
|
if state == "code" && strip(read) != ""
|
2016-12-16 19:18:37 +01:00
|
|
|
chunk = CodeChunk("\n" * strip(read), codeno, start_line, optionString, options)
|
2016-04-21 18:02:08 +02:00
|
|
|
push!(parsed, chunk)
|
|
|
|
codeno +=1
|
|
|
|
read = ""
|
|
|
|
start_line = lineno
|
|
|
|
end
|
|
|
|
state = "doc"
|
|
|
|
elseif (m = match(opt_line, line)) != nothing
|
|
|
|
start_line = lineno
|
|
|
|
if state == "code" && strip(read) !=""
|
2016-12-16 19:18:37 +01:00
|
|
|
chunk = CodeChunk("\n" * strip(read), codeno, start_line, optionString, options)
|
2016-04-21 18:02:08 +02:00
|
|
|
push!(parsed, chunk)
|
|
|
|
read = ""
|
|
|
|
codeno +=1
|
|
|
|
end
|
|
|
|
if state == "doc" && strip(read) != ""
|
|
|
|
(docno > 1) && (read = "\n" * read) # Add whitespace to doc chunk. Needed for markdown output
|
|
|
|
chunk = DocChunk(read, docno, start_line)
|
|
|
|
push!(parsed, chunk)
|
|
|
|
read = ""
|
|
|
|
docno += 1
|
|
|
|
end
|
|
|
|
|
2018-07-23 19:35:15 +02:00
|
|
|
optionString = replace(line, opt_start => "", count=1)
|
2016-04-21 18:02:08 +02:00
|
|
|
#Get options
|
|
|
|
options = Dict{Symbol,Any}()
|
|
|
|
if length(optionString) > 0
|
2018-07-23 12:37:25 +02:00
|
|
|
expr = Meta.parse(optionString)
|
2016-04-21 18:02:08 +02:00
|
|
|
Base.Meta.isexpr(expr,:(=)) && (options[expr.args[1]] = expr.args[2])
|
|
|
|
Base.Meta.isexpr(expr,:toplevel) && map(pushopt,fill(options,length(expr.args)),expr.args)
|
|
|
|
end
|
|
|
|
haskey(options, :label) && (options[:name] = options[:label])
|
|
|
|
haskey(options, :name) || (options[:name] = nothing)
|
|
|
|
|
|
|
|
state = "code"
|
|
|
|
continue
|
2018-01-02 14:48:56 +01:00
|
|
|
elseif state == "doc" #&& strip(line) != "" && strip(read) != ""
|
2016-04-21 18:02:08 +02:00
|
|
|
state = "code"
|
|
|
|
(docno > 1) && (read = "\n" * read) # Add whitespace to doc chunk. Needed for markdown output
|
2016-12-26 19:06:03 +01:00
|
|
|
chunk = DocChunk(read, docno, start_line, format.inline)
|
2016-04-21 18:02:08 +02:00
|
|
|
push!(parsed, chunk)
|
|
|
|
options = Dict{Symbol,Any}()
|
|
|
|
start_line = lineno
|
|
|
|
read = ""
|
|
|
|
docno += 1
|
|
|
|
end
|
|
|
|
read *= line * "\n"
|
|
|
|
|
|
|
|
if strip(line) == ""
|
|
|
|
n_emptylines += 1
|
|
|
|
else
|
|
|
|
n_emptylines = 0
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Handle the last chunk
|
|
|
|
if state == "code"
|
2016-12-16 19:18:37 +01:00
|
|
|
chunk = CodeChunk("\n" * strip(read), codeno, start_line, optionString, options)
|
2016-04-21 18:02:08 +02:00
|
|
|
push!(parsed, chunk)
|
|
|
|
else
|
2016-12-26 19:06:03 +01:00
|
|
|
chunk = DocChunk(read, docno, start_line, format.inline)
|
2016-04-21 18:02:08 +02:00
|
|
|
push!(parsed, chunk)
|
|
|
|
end
|
|
|
|
|
|
|
|
return parsed
|
|
|
|
end
|
2016-12-11 19:23:19 +01:00
|
|
|
|
|
|
|
"""Parse IJUlia notebook"""
|
|
|
|
function parse_doc(document::String, format::NotebookInput)
|
2018-07-23 19:29:07 +02:00
|
|
|
document = replace(document, "\r\n" => "\n")
|
2016-12-11 19:23:19 +01:00
|
|
|
nb = JSON.parse(document)
|
|
|
|
parsed = Any[]
|
|
|
|
options = Dict{Symbol,Any}()
|
|
|
|
opt_string = ""
|
|
|
|
docno = 1
|
|
|
|
codeno = 1
|
|
|
|
|
|
|
|
for cell in nb["cells"]
|
|
|
|
srctext = "\n" * join(cell["source"], "")
|
|
|
|
|
|
|
|
if cell["cell_type"] == "code"
|
|
|
|
chunk = CodeChunk(rstrip(srctext), codeno, 0, opt_string, options)
|
|
|
|
push!(parsed, chunk)
|
|
|
|
codeno += 1
|
|
|
|
else
|
|
|
|
chunk = DocChunk(srctext * "\n", docno, 0)
|
|
|
|
push!(parsed, chunk)
|
|
|
|
docno +=1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return parsed
|
|
|
|
end
|
2016-12-26 19:06:03 +01:00
|
|
|
|
|
|
|
#Use this if regex is undefined
|
|
|
|
function parse_inline(text, noex)
|
2016-12-27 20:43:13 +01:00
|
|
|
return Inline[InlineText(text, 1, length(text), 1)]
|
2016-12-26 19:06:03 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
function parse_inline(text::AbstractString, inline_ex::Regex)
|
2018-07-23 12:37:25 +02:00
|
|
|
occursin(inline_ex, text) || return Inline[InlineText(text, 1, length(text), 1)]
|
2016-12-26 19:06:03 +01:00
|
|
|
|
|
|
|
inline_chunks = eachmatch(inline_ex, text)
|
|
|
|
s = 1
|
|
|
|
e = 1
|
|
|
|
res = Inline[]
|
2016-12-27 20:43:13 +01:00
|
|
|
textno = 1
|
|
|
|
codeno = 1
|
2016-12-26 19:06:03 +01:00
|
|
|
|
|
|
|
for ic in inline_chunks
|
|
|
|
s = ic.offset
|
2016-12-27 20:43:13 +01:00
|
|
|
doc = InlineText(text[e:(s-1)], e, s-1, textno)
|
|
|
|
textno += 1
|
2016-12-26 19:06:03 +01:00
|
|
|
push!(res, doc)
|
2018-08-23 07:22:08 +02:00
|
|
|
e = s + lastindex(ic.match)
|
2016-12-27 20:43:13 +01:00
|
|
|
push!(res, InlineCode(ic.captures[1], s, e, codeno))
|
|
|
|
codeno += 1
|
2016-12-26 19:06:03 +01:00
|
|
|
end
|
2016-12-27 20:43:13 +01:00
|
|
|
push!(res, InlineText(text[e:end], e, length(text), textno))
|
2017-03-13 14:05:07 +01:00
|
|
|
|
2016-12-26 19:06:03 +01:00
|
|
|
return res
|
2017-03-13 14:05:07 +01:00
|
|
|
end
|