First almost working version. Never tried REQUIRE, tests are missing

master
nico202 2018-05-04 08:23:06 +02:00
parent 4973c6d99c
commit 12866bc20e
11 changed files with 525 additions and 4 deletions

View File

@ -1 +1,7 @@
julia 0.6
HTTP
JSON
HttpCommon
URIParser
# BibTeX
# BibTeXFormat

12
bugs.org Normal file
View File

@ -0,0 +1,12 @@
* List of bugs seen that must be fixed
** TODO Date year saved as 1996/09
What to do?
- Should it understand automatically it is a month?
- Just filter it out?
** Similar ids
Those three are the same:
- srinivasan12t_haptics_1997
- srinivasan_haptics_1997
- srinivasanlzl_haptics_1997
On which field should we check the distance? Which threshold? How to merge fields?

View File

@ -1,5 +1,20 @@
__precompile__()
module Scjrm
# package code goes here
using BibTeX
const hooks = Dict()
const libraries = Dict()
currentlibrary = ""
const bibliography = Bibliography("")
include("default.jl")
include("server.jl")
include("macros.jl")
include("globals.jl")
include("configs.jl")
include("zotero.jl")
include("bibliography.jl")
end # module

104
src/bibliography.jl Normal file
View File

@ -0,0 +1,104 @@
#=
Functions to manage the library
=#
using JSON, StringDistances
"""
Add `entry` to `bibliography` on key `id`
Defines `pre_add_to_bib` and `after_add_to_bib`.
"""
function addtobibliography!(bibliography::Bibliography, id::String, entry::Citation)
@hook :pre_add_to_bib
info("Here")
exists = id in keys(bibliography)
info("The id you are adding $(id) does " * (exists ? "" : "not ") * "exists")
if exists
oldentry = bibliography[id]
# TODO: run a Levenshtein distance on the two entry and decide
# what to do Options should be to merge/replace or keep both.
# In the latter the id must be modified (like id-$counter)
old = lowercase(get(oldentry,"title",""))
new = lowercase(get(entry,"title","-"))
differencepercent = compare(TokenMax(RatcliffObershelp()), old, new)
if differencepercent > 0.8
@hook :duplicated_paper
warn("The old and the new seems to be the same paper\n
(similarity is $(differencepercent))")
@show json(oldentry)
@show json(entry)
else
@hook :similar_paper
info("keeping both")
id = nextidnumber(bibliography,id)
end
end
bibliography[id] = entry
@hook :after_add_to_bib
end
""" Set `newbib` as a the bibliography, replacing `bibliography`.
"""
function setbibliography!(bibliography::Bibliography, newbib::Bibliography)
empty!(bibliography)
merge!(bibliography, newbib)
end
import Base.pop!
function pop!(bibliography::Bibliography, key::String)
copy = Dict(bibliography)
p = Base.pop!(copy, key) # FIXME: remove this Base.
empty!(bibliography)
foreach(k -> bibliography[k] = copy[k], keys(copy))
# Behaves like a real `pop`, returning the popped key value
p
end
# FIXME: find files?
"""Add all entries in a bibtex string to the current bibliography"""
function addbibtex!(bibliography, bibstring)
bibs = Bibliography(bibstring)
foreach(bib -> addtobibliography!(bibliography, createcitekey(bibs[bib]), bibs[bib]),
keys(bibs))
"ok"
end
function splitauthors(bib::Citation)
authors = split(bib["author"], " and ")
map(a -> (split(strip(a, ['}', '{']), ", ")),
authors)
end
"""Create the cite key given a bibtex object"""
function createcitekey(bib::Citation)
authors = splitauthors(bib)
firstauthor = length(authors) > 0 ? authors[1][1] : "Unnamed"
title = split(get(bib, "title", "Untitled"), " ")[1]
year = match(r"([0-9]{4})", get(bib, "year", get(bib, "date", "0000")))[1]
string(firstauthor, "_", title, "_", year) |> lowercase
end
"""Generates a new id, with the same key, but with a number at the end
(id-\$number) so that it is unique in the library. Fallback for
duplicates. """
function nextidnumber(bibliography::Bibliography,id::String)
num = 0
while true
nid = string(id, "-", num)
nid in keys(bibliography) || break
num += 1
end
nid
end
# """
# Apply some cleaning to the bibtex citation, namely fixing the key id.
# Useful when adding a bibtex directly to verify that it conforms to our "standard"
# """
# function cleancitation(cit)
# end

5
src/configs.jl Normal file
View File

@ -0,0 +1,5 @@
"""This function creates the path of all the libraries in the "libraries" variable"""
function createlibraries(libraries::Dict)
foreach(library -> mkpath(expanduser(libraries[library]["path"])),
keys(libraries))
end

71
src/default.jl Normal file
View File

@ -0,0 +1,71 @@
#=
This file defines the default config parameters and provides a simple
way of crating a template config file.
=#
defaultconfig = """
\"\"\"Custom functions to run in certain circumstances.
Read the documentation to know which hooks are available.\"\"\"
Scjrm.sethooks!(Dict{Symbol,Function}())
"""
# TODO: you can use merge to have customization + defaults
eval(defaultconfig)
"""
Writes the config to the file `name`
"""
function writeconfig(name::String)
open(name, "w") do f
write(f, defaultconfig)
end
end
"""
Set the `hooks`. If you want to remove all the hooks and then set
the new one, you should call `empty!(Scjrm.hooks)` before.
"""
function sethooks!(h::Dict)
merge!(hooks, h)
end
"""
Add an `hook` to `hooks`.
"""
addhook!(s::Symbol, f::Function) = hooks[:pre_receive] = f
"""
Remove `hook` from `hooks`
"""
function rmhook!(s::Symbol)
# `hooks` is a `const`ant. So we:
# copy `hooks`
tmphooks = deepcopy(hooks)
# Remove `s` from the copy
pop!(tmphooks, s)
# Clear `hooks`
empty!(hooks)
# Merge the copy to the empty `hooks`
merge!(hooks, tmphooks)
end
function setlibraries!(l::Dict)
merge!(libraries,l)
end
"""Change the current library.
If `existing` is false, don't verify the new library exists.
Returns the new library"""
function currentlibrary!(l::String; existing = true)
global currentlibrary, libraries
if l in keys(libraries)
currentlibrary = l
end
currentlibrary
end

10
src/globals.jl Normal file
View File

@ -0,0 +1,10 @@
using HttpCommon
function makeheaders()
headers = HttpCommon.headers()
headers["X-Zotero-Version"] = "5.0.25"
headers["X-Zotero-Connector-API-Version"] = "2"
return headers
end
const headers = makeheaders()

17
src/macros.jl Normal file
View File

@ -0,0 +1,17 @@
macro hook(hookname)
return quote
try
$hookname in keys(hooks) && hooks[$hookname]()
nothing
end
end
end
macro hook(hookname, args)
return quote
try
$hookname in keys(hooks) && hooks[$hookname]($args...)
nothing
end
end
end

45
src/server.jl Normal file
View File

@ -0,0 +1,45 @@
#=
This server is a port of
https://raw.githubusercontent.com/zotero/zotero/master/chrome/content/zotero/xpcom/server_connector.js
=#
using Mux, JSON
using HttpCommon: parsequerystring
using URIParser: unescape_form
out = nothing
@app connector = (
Mux.defaults,
page("/connector/ping", x -> setzoteroheaders(pong(x)...)),
page("/connector/getSelectedCollection", x -> setzoteroheaders(collection(x, libraries, currentlibrary)...)),
page("/connector/saveSnapshot", x -> setzoteroheaders(savesnapshot(x, libraries, currentlibrary)...)),
page("/connector/saveItems", x -> setzoteroheaders(saveitems(x, libraries, currentlibrary)...)),
page("ui", x -> """
Insert the bibtex here</br>
<form action="/add" method="post">
<textarea name="field" cols="80" rows="15"></textarea>
</br>
<input type="submit" value="Submit">
</form>
"""),
page("add", x -> addbibtex!(bibliography,
(x[:data] |> String |> parsequerystring)["field"]
|> unescape_form)),
page("/cli/library/:library", x -> begin
currentlibrary!(String(x[:params][:library]))
# Prevent copying the old bibliography over!
empty!(bibliography)
end),
Mux.notfound())
"""
Starts the `zotero`-compatible connector.
It's a web server running (by default) on port 23199 that listens for connection from the zotero web plugin.
"""
function serve(; host=IPv4(127,0,0,1), port=23119, args...)
Mux.serve(connector, host = host, port = port, args...)
end
keep_running() = while !isinteractive(); sleep(100); end

232
src/zotero.jl Normal file
View File

@ -0,0 +1,232 @@
using JSON, BibTeX
# Downloading
using URIParser, HTTP
#=
Mapping between zotero and BibTeX.
=#
# https://github.com/retorquere/zotero-better-bibtex/blob/master/translators/Better%20BibLaTeX.ts
"Map zotero reference types to bibtex"
const zotero_type_map = Dict(
"artwork" => "artwork",
"audioRecording" => "audio",
"bill" => "legislation",
"blogPost" => "online",
"book" => "book",
"bookSection" => "incollection",
"case" => "jurisdiction",
"computerProgram" => "software",
"conferencePaper" => "inproceedings",
"dictionaryEntry" => "inreference",
"document" => "misc",
"email" => "letter",
"encyclopediaArticle" => "misc", # "inreference",
"film" => "movie",
"forumPost" => "online",
"hearing" => "jurisdiction",
"instantMessage" => "misc",
"interview" => "misc",
"journalArticle" => "article",
"letter" => "letter",
"magazineArticle" => "article",
"manuscript" => "unpublished",
"map" => "misc",
"newspaperArticle" => "article",
"patent" => "patent",
"podcast" => "audio",
"presentation" => "unpublished",
"radioBroadcast" => "audio",
"report" => "report",
"statute" => "legislation",
"thesis" => "thesis",
"tvBroadcast" => "video",
"videoRecording" => "video",
"webpage" => "online"
)
# , subtype: "magazine"},
# , subtype: "newspaper"},
"Map zotero fields to bibtex fields"
const zotero_field_map = Dict(
"title" => "title",
"pages" => "pages",
"DOI" => "DOI",
"ISSN" => "ISSN",
"publisher" => "publisher",
"publicationTitle" => "journal",
"date" => "year",
"volume" => "volume",
"issue" => "number"
# FIXME: ADD TAGS?
)
"Parses the `creatorType` field and creates a string with authors"
function creatortoauthor(c::Dict)
ctype = c["creatorType"]
if ctype != "author"
error("Add this creator type($(ctype)) to the supported ones!")
end
"""{$(c["lastName"]), $(c["firstName"])}"""
end
"Takes all zotero creators, converts and join them"
authors(c) = join(creatortoauthor.(c), " and ")
#=
Functions used to interface with the zotero connector
=#
"""Creates the response by using the response body `res`, the default
`headers` and the headers to add (`extraheaders`).
Defines the hook `set_headers` (before setting them) and
`post_headers_merge` (before sending the response)
"""
function setzoteroheaders(res, extraheaders::Dict)
@hook :set_headers
newheaders = merge(headers, extraheaders)
@hook :post_headers_merge (newheaders,)
Dict(:headers => newheaders, :body => res)
end
"""
The zotero connector sometimes pings the program to see if we are listening (e.g., before sending the data). This must support both GET and POST.
Defines the hook `ping`
"""
function pong(app)
@hook :ping
if app[:method] == "POST"
headers = Dict("Content-Type" => "application/json")
# FIXME: get(libraries[currentlibrary],"autosnapshot", false)
response = json(Dict("prefs" =>
Dict("automaticSnapshots" =>
false
)))
else
headers = Dict("Content-Type" => "text/html")
response = """
<!DOCTYPE html><html><head>
<title>Zotero Connector Server is Available</title></head>
<body>Zotero Connector Server is Available</body></html>"""
end
response, headers
end
"""
Defines the hook `get_collection`
"""
function collection(any, libraries, currentlibrary)
@hook :get_collection
libraryName = "developing"
json(Dict(
"libraryID" => 1,
"libraryName" => "default",
"libraryEditable" => !libraries[currentlibrary]["readonly"],
"editable" => true, # collection-level parameters
"id" => 1, # collection-level
"name" => currentlibrary
)), Dict("Content-Type" => "application/json")
end
"""
Save page snapshot.
Defines the hooks `pre_save_snapshot` and `post_save_snapshot`.
"""
function savesnapshot(req, libraries, currentlibrary)
@hook :pre_save_snapshot
if libraries[currentlibrary]["readonly"]
warn("Library is readonly")
return "no", Dict()
end
parsed = JSON.parse(String(req[:data]))
if !parsed["skipSnapshot"]
info("Saving page snapshot")
open("./devel/snapshot.html", "w") do f
write(f, parsed["html"])
end
else
info("NOT saving page snapshot")
end
@hook :post_save_snapshot
"savesnapshot", Dict()
end
"""
Download (asynchronously) all the attachments.
Defines `pre_get_attachment` and `post_get_attachment`
"""
function getattachment(id, attachment)
@hook :pre_get_attachment
title = attachment["title"]
url = attachment["url"]
mime = nothing
if "mimeType" in keys(attachment)
mime = MIME(attachment["mimeType"])
end
name = basename(URI(url).path)
name == "" && (name = title * ".html")
path = expanduser(libraries[currentlibrary]["path"])
mkpath("$path/$id")
file = "$path/$id/$name"
try
# FIXME: we should tell zotero-connector which is a success and which isn't
@spawn HTTP.open("GET", url) do resource
open(file, "w") do file
write(file, resource)
end
@hook :post_get_attachment
end
end
file
end
function saveitems(req::Dict, libraries, currentlibrary)
@hook :pre_receive
if libraries[currentlibrary]["readonly"]
warn("Library is readonly")
return "no", Dict{String,String}("status"=> string(403))
end
parsed = JSON.parse(String(req[:data]))
foreach(i -> addtobibliography!(bibliography, i...),
parseitem.(parsed["items"]))
@hook :after_receive
"ok", Dict{String,String}("status"=> string(201))
end
function parseitem(item::Dict)
tp = zotero_type_map[item["itemType"]]
data = BibTeX.Citation{Symbol(tp)}()
data["author"] = authors(item["creators"])
for k in keys(item)
if k in keys(zotero_field_map)
data[zotero_field_map[k]] = item[k]
end
end
id = createcitekey(data)
filenames = getattachment.(id, item["attachments"])
data["file"] = join(filenames, "; ")
try
(id,data)
catch
warn("Type "* item["itemType"] * " does not exists!")
(nothing, nothing)
end
end

View File

@ -1,5 +1,9 @@
using Scjrm
using Scjrm: createlibraries
using Base.Test
# write your own tests here
@test 1 == 2
testlibs = Dict("test" => Dict("path" => "/tmp/examplefolderscjrm"))
isfile("/tmp/examplefolderscjrm") && rm("/tmp/examplefolderscjrm")
createlibraries(testlibs)
@test isdir("/tmp/examplefolderscjrm")
isdir("/tmp/examplefolderscjrm") && rm("/tmp/examplefolderscjrm")