From d19b374fe9a6708cfc2bb0ec8779be3c8711200b Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Thu, 8 Jan 2015 18:52:42 +0200 Subject: [PATCH] Implemented caching, added documentation and tests --- .gitignore | 1 + NEWS.md | 1 + REQUIRE | 2 +- doc/manual.md | 1 + src/Weave.jl | 7 ++- src/cache.jl | 41 +++++++++++------ src/run.jl | 75 ++++++++++++++++++++++---------- test/cache_test.jl | 37 ++++++++++++++++ test/documents/chunk_cache.noweb | 35 +++++++++++++++ test/runtests.jl | 5 ++- 10 files changed, 165 insertions(+), 40 deletions(-) create mode 100644 test/cache_test.jl create mode 100644 test/documents/chunk_cache.noweb diff --git a/.gitignore b/.gitignore index cb51d13..e3a75cc 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ examples/*.html examples/*.rst examples/*.tex test/documents/figures +test/documents/cache test/documents/output/figures test/documents/output/gadfly_formats_test.txt test/documents/*.tex diff --git a/NEWS.md b/NEWS.md index 02d3f8c..94de12d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ### Changes in master +* Simple caching of code chunks * Each document is executed in separate sandbox module instead of redefining the same one. Fixes warnings and occasional segfaults. * New chunk option: `line_width`. * Bug fix in wrapping output lines. diff --git a/REQUIRE b/REQUIRE index f93cf06..e126e43 100644 --- a/REQUIRE +++ b/REQUIRE @@ -2,4 +2,4 @@ julia 0.3 Compat ArgParse Docile -HDF5 +JSON diff --git a/doc/manual.md b/doc/manual.md index d7354fe..6eefa17 100644 --- a/doc/manual.md +++ b/doc/manual.md @@ -59,6 +59,7 @@ Weave currently supports the following chunk options with the following defaults * `label`. Chunk label, will be used for figure labels in Latex as fig:label * `wrap = true`. Wrap long lines from output. * `line_width = 75`. Line width for wrapped lines. +* `cache = false`. Cache results, depends on `cache` parameter on `weave` function. **Options for figures** diff --git a/src/Weave.jl b/src/Weave.jl index 07c8c67..592ff41 100644 --- a/src/Weave.jl +++ b/src/Weave.jl @@ -90,17 +90,20 @@ weave(source ; doctype = "pandoc", plotlib="Gadfly", `"somepath"`: Path as a string e.g `"/home/mpastell/weaveout"` * `fig_path`: where figures will be generated, relative to out_path * `fig_ext`: Extension for saved figures e.g. `".pdf"`, `".png"`. Default setting depends on `doctype`. +* `cache_path`: where of cached output will be saved. +* `cache`: controls caching of code: `:off` = no caching, `:all` = cache everything, + `:user` = cache based on chunk options, `:refresh`, run all code chunks and save new cache. **Note:** Run Weave from terminal and not using IJulia, Juno or ESS, they tend to mess with capturing output. """ -> function weave(source ; doctype = "pandoc", plotlib="Gadfly", informat="noweb", out_path=:doc, fig_path = "figures", fig_ext = nothing, - cache_path = "cache") + cache_path = "cache", cache=:off) doc = read_doc(source, informat) #Reader toimii, muuten kesken... doc = run(doc, doctype = doctype, plotlib=plotlib, informat = informat, out_path=out_path, - fig_path = fig_path, fig_ext = fig_ext, cache_path = cache_path) + fig_path = fig_path, fig_ext = fig_ext, cache_path = cache_path, cache=cache) formatted = format(doc) outname = "$(doc.cwd)/$(doc.basename).$(doc.format.formatdict[:extension])" diff --git a/src/cache.jl b/src/cache.jl index 9f6ff3b..be35a8c 100644 --- a/src/cache.jl +++ b/src/cache.jl @@ -1,25 +1,38 @@ -import HDF5, JLD +import JSON + function write_cache(doc::WeaveDoc, cache_path) - isdir(cache_path) || mkdir(cache_path) - name = "$cache_path/$(doc.basename).jld" - JLD.save(name, "doc", doc) - #open(name, "w") do io - # write(io, JSON.json(doc)) - #end + cache_dir = "$(doc.cwd)/$cache_path" + isdir(cache_dir) || mkpath(cache_dir) + name = "$cache_dir/$(doc.basename).json" + open(name, "w") do io + write(io, JSON.json(doc)) + end return nothing end function read_cache(doc::WeaveDoc, cache_path) - name = "$cache_path/$(doc.basename).jld" + name = "$(doc.cwd)/$cache_path/$(doc.basename).json" isfile(name) || return nothing - return JLD.load(name, "doc") - #parsed = JSON.parsefile(name) - #doc = WeaveDoc(parsed["source"], parsed["chunks"], - #parsed["cwd"], parsed["doctype"]) + parsed = JSON.parsefile(name) end +#read_cache returns a dictionary, parse to back to chunk +function restore_chunk(chunk::CodeChunk, cached, idx) + options = Dict{Symbol, Any}() + for (keys,vals) = cached["chunks"][idx]["options"] + options[symbol(keys)] = vals + end + haskey(options, :term_state) && (options[:term_state] = symbol(options[:term_state])) + chunk.options = options + chunk.content = cached["chunks"][idx]["content"] + chunk.output = cached["chunks"][idx]["output"] + chunk.figures = cached["chunks"][idx]["figures"] -#Todo caching of data, can get the contents of module using: -#names(ReportSandBox, all=true) + return chunk +end + +function restore_chunk(chunk::DocChunk, cached, idx) + chunk +end diff --git a/src/run.jl b/src/run.jl index ed69fa9..4dff12c 100644 --- a/src/run.jl +++ b/src/run.jl @@ -1,10 +1,29 @@ +@doc """ +Run code chunks and capture output from parsed document. - - +```julia function run(doc::WeaveDoc; doctype = "pandoc", plotlib="Gadfly", informat="noweb", + out_path=:doc, fig_path = "figures", fig_ext = nothing, + cache_path = "cache", cache = :off) +``` + +* `doctype`: see `list_out_formats()` +* `plotlib`: `"PyPlot"`, `"Gadfly"`, or `"Winston"` +* `informat`: `"noweb"` of `"markdown"` +* `out_path`: Path where the output is generated. Can be: `:doc`: Path of the source document, `:pwd`: Julia working directory, +`"somepath"`: Path as a string e.g `"/home/mpastell/weaveout"` +* `fig_path`: where figures will be generated, relative to out_path +* `fig_ext`: Extension for saved figures e.g. `".pdf"`, `".png"`. Default setting depends on `doctype`. +* `cache_path`: where of cached output will be saved. +* `cache`: controls caching of code: `:off` = no caching, `:all` = cache everything, + `:user` = cache based on chunk options, `:refresh`, run all code chunks and save new cache. + +**Note:** Run command from terminal and not using IJulia, Juno or ESS, they tend to mess with capturing output. +""" -> +function Base.run(doc::WeaveDoc; doctype = "pandoc", plotlib="Gadfly", informat="noweb", out_path=:doc, fig_path = "figures", fig_ext = nothing, cache_path = "cache", cache = :off) - #cache :all, :user, :off + #cache :all, :user, :off, :refresh doc.cwd = get_cwd(doc, out_path) @@ -23,7 +42,7 @@ function run(doc::WeaveDoc; doctype = "pandoc", plotlib="Gadfly", informat="nowe report = Report(doc.cwd, doc.basename, doc.format.formatdict) pushdisplay(report) - if cache != :off + if cache != :off || cache != :refresh cached = read_cache(doc, cache_path) cached == nothing && info("No cached results found, running code") else @@ -35,11 +54,7 @@ function run(doc::WeaveDoc; doctype = "pandoc", plotlib="Gadfly", informat="nowe for i = 1:n chunk = doc.chunks[i] - if cached != nothing && (cache == :all || (cache ==:user && chunk.options.cache)) - result_chunk = cached.chunks[i] - else - result_chunk = eval_chunk(chunk, report, SandBox) - end + result_chunk = run_chunk(chunk, report, SandBox, cached, cache, i) push!(executed, result_chunk) end @@ -55,6 +70,31 @@ function run(doc::WeaveDoc; doctype = "pandoc", plotlib="Gadfly", informat="nowe return doc end +function run_chunk(chunk::CodeChunk, report::Report, SandBox::Module, cached, cache, idx) + defaults = copy(rcParams[:chunk_defaults]) + options = copy(chunk.options) + try + options = merge(rcParams[:chunk_defaults], options) + catch + options = rcParams[:chunk_defaults] + warn("Invalid format for chunk options line: $(chunk.start_line)") + end + + merge!(chunk.options, options) + + delete!(chunk.options, :options) + + if cached != nothing && (cache == :all || (cache ==:user && chunk.options[:cache])) + result_chunk = restore_chunk(chunk, cached, idx) + else + result_chunk = eval_chunk(chunk, report, SandBox) + end +end + +function run_chunk(chunk::DocChunk, report::Report, SandBox::Module, cached, cache, idx) + return chunk +end + function run_block(code_str, report::Report, SandBox::Module) oldSTDOUT = STDOUT result = "" @@ -113,18 +153,6 @@ end function eval_chunk(chunk::CodeChunk, report::Report, SandBox::Module) info("Weaving chunk $(chunk.number) from line $(chunk.start_line)") - defaults = copy(rcParams[:chunk_defaults]) - options = copy(chunk.options) - try - options = merge(rcParams[:chunk_defaults], options) - catch - options = rcParams[:chunk_defaults] - warn("Invalid format for chunk options line: $(chunk.start_line)") - end - - merge!(chunk.options, options) - #delete!(chunk.options, :options) - #@show chunk.options if !chunk.options[:eval] chunk.output = "" @@ -132,11 +160,13 @@ function eval_chunk(chunk::CodeChunk, report::Report, SandBox::Module) return chunk end + report.fignum = 1 report.cur_result = "" report.figures = String[] report.cur_chunk = chunk report.term_state = :text + if haskey(report.formatdict, :out_width) && chunk.options[:out_width] == nothing chunk.options[:out_width] = report.formatdict[:out_width] end @@ -162,6 +192,7 @@ end + #Set all variables to nothing function clear_sandbox(SandBox::Module) for name = names(SandBox, true) @@ -174,7 +205,7 @@ end function get_figname(report::Report, chunk; fignum = nothing) figpath = joinpath(report.cwd, chunk.options[:fig_path]) - isdir(figpath) || mkdir(figpath) + isdir(figpath) || mkpath(figpath) ext = chunk.options[:fig_ext] fignum == nothing && (fignum = report.fignum) diff --git a/test/cache_test.jl b/test/cache_test.jl new file mode 100644 index 0000000..6971a01 --- /dev/null +++ b/test/cache_test.jl @@ -0,0 +1,37 @@ +using Weave +using Base.Test + +#Test if running document with and without cache works +isdir("documents/cache") && rm("documents/cache", recursive = true) +weave("documents/chunk_options.noweb", plotlib=nothing, cache=:all) +ctime1 = ctime("documents/chunk_options.md") +result = readall(open("documents/chunk_options.md")) +weave("documents/chunk_options.noweb", plotlib=nothing, cache=:all) +ctime2 = ctime("documents/chunk_options.md") +cached_result = readall(open("documents/chunk_options.md")) +@test result == cached_result +@test ctime1 != ctime2 + +# cache = :user +isdir("documents/cache") && rm("documents/cache", recursive = true) +out = "documents/chunk_cache.md" +Weave.weave("documents/chunk_cache.noweb", plotlib=nothing, cache=:user); +result = readall(open(out)) +ctime1 = ctime(out) +Weave.weave("documents/chunk_cache.noweb", plotlib=nothing, cache=:user); +cached_result = readall(open(out)) +ctime2 = ctime(out) +@test result == cached_result +@test ctime1 != ctime2 + +using Gadfly +isdir("documents/cache") && rm("documents/cache", recursive = true) +#Caching with Gadfly +weave("documents/gadfly_formats_test.txt", doctype="tex", plotlib="gadfly", cache=:all) +result = readall(open("documents/gadfly_formats_test.tex")) +ctime1 = ctime("documents/gadfly_formats_test.tex") +weave("documents/gadfly_formats_test.txt", doctype="tex", plotlib="gadfly", cache=:all) +ctime2 = ctime("documents/gadfly_formats_test.tex") +cached_result = readall(open("documents/gadfly_formats_test.tex")) +@test result == cached_result +@test ctime1 != ctime2 diff --git a/test/documents/chunk_cache.noweb b/test/documents/chunk_cache.noweb new file mode 100644 index 0000000..1eaed4b --- /dev/null +++ b/test/documents/chunk_cache.noweb @@ -0,0 +1,35 @@ + +<>= +y= [2, 5, 12] +@ + + +<>= +y= [2, 5, 12] +@ + + +<<>>= +x = [12, 10] +println(y) +println(x) +@ + +<>= +x = [12, 10] +println(y) +println(x) +@ + +<<>>= +y = 1:5 +println(y) +@ + + +<>= +y = 1:5 +println(y) +@ + +Some text in the end diff --git a/test/runtests.jl b/test/runtests.jl index f7c1579..8eee229 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,10 @@ using Base.Test info("Test: Chunk options") include("chunk_options.jl") -info("Testing: Weaving with Winston") +info("Test: Caching") +include("cache_test.jl") + +info("Test: Weaving with Winston") include("winston_formats.jl") info("Test: Weaving with Gadfly")