From 12866bc20ec641da83854948f39a560dc5e83a23 Mon Sep 17 00:00:00 2001 From: nico202 Date: Fri, 4 May 2018 08:23:06 +0200 Subject: [PATCH] First almost working version. Never tried REQUIRE, tests are missing --- REQUIRE | 6 ++ bugs.org | 12 +++ src/Scjrm.jl | 17 +++- src/bibliography.jl | 104 ++++++++++++++++++++ src/configs.jl | 5 + src/default.jl | 71 ++++++++++++++ src/globals.jl | 10 ++ src/macros.jl | 17 ++++ src/server.jl | 45 +++++++++ src/zotero.jl | 232 ++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 10 +- 11 files changed, 525 insertions(+), 4 deletions(-) create mode 100644 bugs.org create mode 100644 src/bibliography.jl create mode 100644 src/configs.jl create mode 100644 src/default.jl create mode 100644 src/globals.jl create mode 100644 src/macros.jl create mode 100644 src/server.jl create mode 100644 src/zotero.jl diff --git a/REQUIRE b/REQUIRE index 137767a..6b640d7 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1 +1,7 @@ julia 0.6 +HTTP +JSON +HttpCommon +URIParser +# BibTeX +# BibTeXFormat \ No newline at end of file diff --git a/bugs.org b/bugs.org new file mode 100644 index 0000000..1c06847 --- /dev/null +++ b/bugs.org @@ -0,0 +1,12 @@ +* List of bugs seen that must be fixed +** TODO Date year saved as 1996/09 +What to do? +- Should it understand automatically it is a month? +- Just filter it out? +** Similar ids +Those three are the same: +- srinivasan12t_haptics_1997 +- srinivasan_haptics_1997 +- srinivasanl’zl_haptics_1997 + +On which field should we check the distance? Which threshold? How to merge fields? diff --git a/src/Scjrm.jl b/src/Scjrm.jl index 7824af1..450fc1a 100644 --- a/src/Scjrm.jl +++ b/src/Scjrm.jl @@ -1,5 +1,20 @@ +__precompile__() + module Scjrm -# package code goes here +using BibTeX + +const hooks = Dict() +const libraries = Dict() +currentlibrary = "" +const bibliography = Bibliography("") + +include("default.jl") +include("server.jl") +include("macros.jl") +include("globals.jl") +include("configs.jl") +include("zotero.jl") +include("bibliography.jl") end # module diff --git a/src/bibliography.jl b/src/bibliography.jl new file mode 100644 index 0000000..9908254 --- /dev/null +++ b/src/bibliography.jl @@ -0,0 +1,104 @@ +#= +Functions to manage the library +=# +using JSON, StringDistances + +""" + Add `entry` to `bibliography` on key `id` + +Defines `pre_add_to_bib` and `after_add_to_bib`. +""" +function addtobibliography!(bibliography::Bibliography, id::String, entry::Citation) + @hook :pre_add_to_bib + info("Here") + exists = id in keys(bibliography) + info("The id you are adding $(id) does " * (exists ? "" : "not ") * "exists") + if exists + oldentry = bibliography[id] + # TODO: run a Levenshtein distance on the two entry and decide + # what to do Options should be to merge/replace or keep both. + # In the latter the id must be modified (like id-$counter) + + old = lowercase(get(oldentry,"title","")) + new = lowercase(get(entry,"title","-")) + differencepercent = compare(TokenMax(RatcliffObershelp()), old, new) + if differencepercent > 0.8 + @hook :duplicated_paper + warn("The old and the new seems to be the same paper\n + (similarity is $(differencepercent))") + @show json(oldentry) + @show json(entry) + else + @hook :similar_paper + info("keeping both") + id = nextidnumber(bibliography,id) + end + end + bibliography[id] = entry + + @hook :after_add_to_bib +end + +""" Set `newbib` as a the bibliography, replacing `bibliography`. +""" +function setbibliography!(bibliography::Bibliography, newbib::Bibliography) + empty!(bibliography) + merge!(bibliography, newbib) +end + +import Base.pop! +function pop!(bibliography::Bibliography, key::String) + copy = Dict(bibliography) + p = Base.pop!(copy, key) # FIXME: remove this Base. + empty!(bibliography) + foreach(k -> bibliography[k] = copy[k], keys(copy)) + # Behaves like a real `pop`, returning the popped key value + p +end + +# FIXME: find files? +"""Add all entries in a bibtex string to the current bibliography""" +function addbibtex!(bibliography, bibstring) + bibs = Bibliography(bibstring) + foreach(bib -> addtobibliography!(bibliography, createcitekey(bibs[bib]), bibs[bib]), + keys(bibs)) + "ok" +end + +function splitauthors(bib::Citation) + authors = split(bib["author"], " and ") + map(a -> (split(strip(a, ['}', '{']), ", ")), + authors) +end + +"""Create the cite key given a bibtex object""" +function createcitekey(bib::Citation) + authors = splitauthors(bib) + firstauthor = length(authors) > 0 ? authors[1][1] : "Unnamed" + title = split(get(bib, "title", "Untitled"), " ")[1] + year = match(r"([0-9]{4})", get(bib, "year", get(bib, "date", "0000")))[1] + + string(firstauthor, "_", title, "_", year) |> lowercase +end + +"""Generates a new id, with the same key, but with a number at the end +(id-\$number) so that it is unique in the library. Fallback for +duplicates. """ +function nextidnumber(bibliography::Bibliography,id::String) + num = 0 + while true + nid = string(id, "-", num) + nid in keys(bibliography) || break + num += 1 + end + nid +end + +# """ +# Apply some cleaning to the bibtex citation, namely fixing the key id. + +# Useful when adding a bibtex directly to verify that it conforms to our "standard" +# """ +# function cleancitation(cit) + +# end diff --git a/src/configs.jl b/src/configs.jl new file mode 100644 index 0000000..10b52c4 --- /dev/null +++ b/src/configs.jl @@ -0,0 +1,5 @@ +"""This function creates the path of all the libraries in the "libraries" variable""" +function createlibraries(libraries::Dict) + foreach(library -> mkpath(expanduser(libraries[library]["path"])), + keys(libraries)) +end diff --git a/src/default.jl b/src/default.jl new file mode 100644 index 0000000..3b492a7 --- /dev/null +++ b/src/default.jl @@ -0,0 +1,71 @@ +#= + +This file defines the default config parameters and provides a simple +way of crating a template config file. + +=# + +defaultconfig = """ +\"\"\"Custom functions to run in certain circumstances. +Read the documentation to know which hooks are available.\"\"\" +Scjrm.sethooks!(Dict{Symbol,Function}()) +""" + +# TODO: you can use merge to have customization + defaults + +eval(defaultconfig) + +""" + Writes the config to the file `name` +""" +function writeconfig(name::String) + open(name, "w") do f + write(f, defaultconfig) + end +end + +""" +Set the `hooks`. If you want to remove all the hooks and then set +the new one, you should call `empty!(Scjrm.hooks)` before. +""" +function sethooks!(h::Dict) + merge!(hooks, h) +end + +""" + Add an `hook` to `hooks`. +""" +addhook!(s::Symbol, f::Function) = hooks[:pre_receive] = f + +""" + Remove `hook` from `hooks` +""" +function rmhook!(s::Symbol) + # `hooks` is a `const`ant. So we: + # copy `hooks` + tmphooks = deepcopy(hooks) + # Remove `s` from the copy + pop!(tmphooks, s) + # Clear `hooks` + empty!(hooks) + # Merge the copy to the empty `hooks` + merge!(hooks, tmphooks) +end + +function setlibraries!(l::Dict) + merge!(libraries,l) +end + +"""Change the current library. + +If `existing` is false, don't verify the new library exists. +Returns the new library""" +function currentlibrary!(l::String; existing = true) + global currentlibrary, libraries + + if l in keys(libraries) + currentlibrary = l + end + + currentlibrary +end diff --git a/src/globals.jl b/src/globals.jl new file mode 100644 index 0000000..16d64b5 --- /dev/null +++ b/src/globals.jl @@ -0,0 +1,10 @@ +using HttpCommon + +function makeheaders() + headers = HttpCommon.headers() + headers["X-Zotero-Version"] = "5.0.25" + headers["X-Zotero-Connector-API-Version"] = "2" + return headers +end + +const headers = makeheaders() diff --git a/src/macros.jl b/src/macros.jl new file mode 100644 index 0000000..90c5916 --- /dev/null +++ b/src/macros.jl @@ -0,0 +1,17 @@ +macro hook(hookname) + return quote + try + $hookname in keys(hooks) && hooks[$hookname]() + nothing + end + end +end + +macro hook(hookname, args) + return quote + try + $hookname in keys(hooks) && hooks[$hookname]($args...) + nothing + end + end +end diff --git a/src/server.jl b/src/server.jl new file mode 100644 index 0000000..e3a33d5 --- /dev/null +++ b/src/server.jl @@ -0,0 +1,45 @@ +#= +This server is a port of + +https://raw.githubusercontent.com/zotero/zotero/master/chrome/content/zotero/xpcom/server_connector.js +=# +using Mux, JSON +using HttpCommon: parsequerystring +using URIParser: unescape_form + +out = nothing + +@app connector = ( + Mux.defaults, + page("/connector/ping", x -> setzoteroheaders(pong(x)...)), + page("/connector/getSelectedCollection", x -> setzoteroheaders(collection(x, libraries, currentlibrary)...)), + page("/connector/saveSnapshot", x -> setzoteroheaders(savesnapshot(x, libraries, currentlibrary)...)), + page("/connector/saveItems", x -> setzoteroheaders(saveitems(x, libraries, currentlibrary)...)), + page("ui", x -> """ + Insert the bibtex here
+
+ +
+ +
+ """), + page("add", x -> addbibtex!(bibliography, + (x[:data] |> String |> parsequerystring)["field"] + |> unescape_form)), + page("/cli/library/:library", x -> begin + currentlibrary!(String(x[:params][:library])) + # Prevent copying the old bibliography over! + empty!(bibliography) + end), + Mux.notfound()) + +""" + Starts the `zotero`-compatible connector. + +It's a web server running (by default) on port 23199 that listens for connection from the zotero web plugin. +""" +function serve(; host=IPv4(127,0,0,1), port=23119, args...) + Mux.serve(connector, host = host, port = port, args...) +end + +keep_running() = while !isinteractive(); sleep(100); end diff --git a/src/zotero.jl b/src/zotero.jl new file mode 100644 index 0000000..2c314ca --- /dev/null +++ b/src/zotero.jl @@ -0,0 +1,232 @@ +using JSON, BibTeX +# Downloading +using URIParser, HTTP +#= +Mapping between zotero and BibTeX. +=# + +# https://github.com/retorquere/zotero-better-bibtex/blob/master/translators/Better%20BibLaTeX.ts + +"Map zotero reference types to bibtex" +const zotero_type_map = Dict( + "artwork" => "artwork", + "audioRecording" => "audio", + "bill" => "legislation", + "blogPost" => "online", + "book" => "book", + "bookSection" => "incollection", + "case" => "jurisdiction", + "computerProgram" => "software", + "conferencePaper" => "inproceedings", + "dictionaryEntry" => "inreference", + "document" => "misc", + "email" => "letter", + "encyclopediaArticle" => "misc", # "inreference", + "film" => "movie", + "forumPost" => "online", + "hearing" => "jurisdiction", + "instantMessage" => "misc", + "interview" => "misc", + "journalArticle" => "article", + "letter" => "letter", + "magazineArticle" => "article", + "manuscript" => "unpublished", + "map" => "misc", + "newspaperArticle" => "article", + "patent" => "patent", + "podcast" => "audio", + "presentation" => "unpublished", + "radioBroadcast" => "audio", + "report" => "report", + "statute" => "legislation", + "thesis" => "thesis", + "tvBroadcast" => "video", + "videoRecording" => "video", + "webpage" => "online" +) +# , subtype: "magazine"}, +# , subtype: "newspaper"}, + + +"Map zotero fields to bibtex fields" +const zotero_field_map = Dict( + "title" => "title", + "pages" => "pages", + "DOI" => "DOI", + "ISSN" => "ISSN", + "publisher" => "publisher", + "publicationTitle" => "journal", + "date" => "year", + "volume" => "volume", + "issue" => "number" + # FIXME: ADD TAGS? +) + +"Parses the `creatorType` field and creates a string with authors" +function creatortoauthor(c::Dict) + ctype = c["creatorType"] + if ctype != "author" + error("Add this creator type($(ctype)) to the supported ones!") + end + """{$(c["lastName"]), $(c["firstName"])}""" +end + +"Takes all zotero creators, converts and join them" +authors(c) = join(creatortoauthor.(c), " and ") + +#= +Functions used to interface with the zotero connector +=# + +"""Creates the response by using the response body `res`, the default + `headers` and the headers to add (`extraheaders`). + +Defines the hook `set_headers` (before setting them) and +`post_headers_merge` (before sending the response) +""" +function setzoteroheaders(res, extraheaders::Dict) + @hook :set_headers + newheaders = merge(headers, extraheaders) + @hook :post_headers_merge (newheaders,) + Dict(:headers => newheaders, :body => res) +end + +""" + The zotero connector sometimes pings the program to see if we are listening (e.g., before sending the data). This must support both GET and POST. + +Defines the hook `ping` +""" +function pong(app) + @hook :ping + if app[:method] == "POST" + headers = Dict("Content-Type" => "application/json") + # FIXME: get(libraries[currentlibrary],"autosnapshot", false) + response = json(Dict("prefs" => + Dict("automaticSnapshots" => + false + ))) + else + headers = Dict("Content-Type" => "text/html") + response = """ + + Zotero Connector Server is Available + Zotero Connector Server is Available""" + end + + response, headers +end + +""" + +Defines the hook `get_collection` +""" +function collection(any, libraries, currentlibrary) + @hook :get_collection + libraryName = "developing" + + json(Dict( + "libraryID" => 1, + "libraryName" => "default", + "libraryEditable" => !libraries[currentlibrary]["readonly"], + "editable" => true, # collection-level parameters + "id" => 1, # collection-level + "name" => currentlibrary + )), Dict("Content-Type" => "application/json") +end + +""" + Save page snapshot. + +Defines the hooks `pre_save_snapshot` and `post_save_snapshot`. +""" +function savesnapshot(req, libraries, currentlibrary) + @hook :pre_save_snapshot + if libraries[currentlibrary]["readonly"] + warn("Library is readonly") + return "no", Dict() + end + + parsed = JSON.parse(String(req[:data])) + if !parsed["skipSnapshot"] + info("Saving page snapshot") + open("./devel/snapshot.html", "w") do f + write(f, parsed["html"]) + end + else + info("NOT saving page snapshot") + end + @hook :post_save_snapshot + "savesnapshot", Dict() +end + +""" + Download (asynchronously) all the attachments. + +Defines `pre_get_attachment` and `post_get_attachment` +""" +function getattachment(id, attachment) + @hook :pre_get_attachment + title = attachment["title"] + url = attachment["url"] + + mime = nothing + if "mimeType" in keys(attachment) + mime = MIME(attachment["mimeType"]) + end + + name = basename(URI(url).path) + name == "" && (name = title * ".html") + path = expanduser(libraries[currentlibrary]["path"]) + mkpath("$path/$id") + file = "$path/$id/$name" + try + # FIXME: we should tell zotero-connector which is a success and which isn't + @spawn HTTP.open("GET", url) do resource + open(file, "w") do file + write(file, resource) + end + @hook :post_get_attachment + end + end + file +end + +function saveitems(req::Dict, libraries, currentlibrary) + @hook :pre_receive + if libraries[currentlibrary]["readonly"] + warn("Library is readonly") + return "no", Dict{String,String}("status"=> string(403)) + end + + parsed = JSON.parse(String(req[:data])) + + foreach(i -> addtobibliography!(bibliography, i...), + parseitem.(parsed["items"])) + + @hook :after_receive + + "ok", Dict{String,String}("status"=> string(201)) +end + + +function parseitem(item::Dict) + tp = zotero_type_map[item["itemType"]] + data = BibTeX.Citation{Symbol(tp)}() + data["author"] = authors(item["creators"]) + + for k in keys(item) + if k in keys(zotero_field_map) + data[zotero_field_map[k]] = item[k] + end + end + + id = createcitekey(data) + filenames = getattachment.(id, item["attachments"]) + data["file"] = join(filenames, "; ") + try + (id,data) + catch + warn("Type "* item["itemType"] * " does not exists!") + (nothing, nothing) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 2c45040..54652e9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,9 @@ -using Scjrm +using Scjrm: createlibraries using Base.Test -# write your own tests here -@test 1 == 2 +testlibs = Dict("test" => Dict("path" => "/tmp/examplefolderscjrm")) +isfile("/tmp/examplefolderscjrm") && rm("/tmp/examplefolderscjrm") + +createlibraries(testlibs) +@test isdir("/tmp/examplefolderscjrm") +isdir("/tmp/examplefolderscjrm") && rm("/tmp/examplefolderscjrm")