guix-devel/guix/import/cran.scm

252 lines
8.9 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2015 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix import cran)
#:use-module (ice-9 match)
#:use-module (ice-9 regex)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:use-module (sxml simple)
#:use-module (sxml match)
#:use-module (sxml xpath)
#:use-module (guix http-client)
#:use-module (guix hash)
#:use-module (guix store)
#:use-module (guix base32)
#:use-module ((guix download) #:select (download-to-store))
#:use-module (guix import utils)
#:use-module (guix upstream)
#:use-module (guix packages)
#:export (cran->guix-package
%cran-updater))
;;; Commentary:
;;;
;;; Generate a package declaration template for the latest version of an R
;;; package on CRAN, using the HTML description downloaded from
;;; cran.r-project.org.
;;;
;;; Code:
(define string->license
(match-lambda
("AGPL-3" 'agpl3+)
("Artistic-2.0" 'artistic2.0)
("Apache License 2.0" 'asl2.0)
("BSD_2_clause" 'bsd-2)
("BSD_3_clause" 'bsd-3)
("GPL" (list 'gpl2+ 'gpl3+))
("GPL (>= 2)" 'gpl2+)
("GPL (>= 3)" 'gpl3+)
("GPL-2" 'gpl2+)
("GPL-3" 'gpl3+)
("LGPL-2" 'lgpl2.0+)
("LGPL-2.1" 'lgpl2.1+)
("LGPL-3" 'lgpl3+)
("LGPL (>= 2)" 'lgpl2.0+)
("LGPL (>= 3)" 'lgpl3+)
("MIT" 'x11)
("MIT + file LICENSE" 'x11)
((x) (string->license x))
((lst ...) `(list ,@(map string->license lst)))
(_ #f)))
(define (format-inputs names)
"Generate a sorted list of package inputs from a list of package NAMES."
(map (lambda (name)
(list name (list 'unquote (string->symbol name))))
(sort names string-ci<?)))
(define* (maybe-inputs package-inputs #:optional (type 'inputs))
"Given a list of PACKAGE-INPUTS, tries to generate the TYPE field of a
package definition."
(match package-inputs
(()
'())
((package-inputs ...)
`((,type (,'quasiquote ,(format-inputs package-inputs)))))))
(define (table-datum tree label)
"Extract the datum node following a LABEL in the sxml table TREE. Only the
first cell of a table row is considered a label cell."
((node-pos 1)
((sxpath `(xhtml:tr
(xhtml:td 1) ; only first cell can contain label
(equal? ,label)
,(node-parent tree) ; go up to label cell
,(node-parent tree) ; go up to matching row
(xhtml:td 2))) ; select second cell
tree)))
(define %cran-url "http://cran.r-project.org/web/packages/")
(define (cran-fetch name)
"Return an sxml representation of the CRAN page for the R package NAME,
or #f on failure. NAME is case-sensitive."
;; This API always returns the latest release of the module.
(let ((cran-url (string-append %cran-url name "/")))
(false-if-exception
(xml->sxml (http-fetch cran-url)
#:trim-whitespace? #t
#:namespaces '((xhtml . "http://www.w3.org/1999/xhtml"))
#:default-entity-handler
(lambda (port name)
(case name
((nbsp) " ")
((ge) ">=")
((gt) ">")
((lt) "<")
(else
(format (current-warning-port)
"~a:~a:~a: undefined entitity: ~a\n"
cran-url (port-line port) (port-column port)
name)
(symbol->string name))))))))
(define (downloads->url downloads)
"Extract from DOWNLOADS, the downloads item of the CRAN sxml tree, the
download URL."
(string-append "mirror://cran/"
;; Remove double dots, because we want an
;; absolute path.
(regexp-substitute/global
#f "\\.\\./"
(string-join ((sxpath '((xhtml:a 1) @ href *text*))
(table-datum downloads " Package source: ")))
'pre 'post)))
(define (nodes->text nodeset)
"Return the concatenation of the text nodes among NODESET."
(string-join ((sxpath '(// *text*)) nodeset) " "))
(define (cran-sxml->sexp sxml)
"Return the `package' s-expression for a CRAN package from the SXML
representation of the package page."
(define (guix-name name)
(if (string-prefix? "r-" name)
(string-downcase name)
(string-append "r-" (string-downcase name))))
(sxml-match-let*
(((*TOP* (xhtml:html
,head
(xhtml:body
(xhtml:h2 ,name-and-synopsis)
(xhtml:p ,description)
,summary
(xhtml:h4 "Downloads:") ,downloads
. ,rest)))
sxml))
(let* ((name (match:prefix (string-match ": " name-and-synopsis)))
(synopsis (match:suffix (string-match ": " name-and-synopsis)))
(version (nodes->text (table-datum summary "Version:")))
(license ((compose string->license nodes->text)
(table-datum summary "License:")))
(home-page (nodes->text ((sxpath '((xhtml:a 1)))
(table-datum summary "URL:"))))
(source-url (downloads->url downloads))
(tarball (with-store store (download-to-store store source-url)))
(sysdepends (map match:substring
(list-matches
"[^ ]+"
;; Strip off comma and parenthetical
;; expressions.
(regexp-substitute/global
#f "(,|\\([^\\)]+\\))"
(nodes->text (table-datum summary
"SystemRequirements:"))
'pre 'post))))
(imports (map guix-name
((sxpath '(// xhtml:a *text*))
(table-datum summary "Imports:")))))
`(package
(name ,(guix-name name))
(version ,version)
(source (origin
(method url-fetch)
(uri (cran-uri ,name version))
(sha256
(base32
,(bytevector->nix-base32-string (file-sha256 tarball))))))
(build-system r-build-system)
,@(maybe-inputs sysdepends)
,@(maybe-inputs imports 'propagated-inputs)
(home-page ,(if (string-null? home-page)
(string-append %cran-url name)
home-page))
(synopsis ,synopsis)
;; Use double spacing
(description ,(regexp-substitute/global #f "\\. \\b" description
'pre ". " 'post))
(license ,license)))))
(define (cran->guix-package package-name)
"Fetch the metadata for PACKAGE-NAME from cran.r-project.org, and return the
`package' s-expression corresponding to that package, or #f on failure."
(let ((module-meta (cran-fetch package-name)))
(and=> module-meta cran-sxml->sexp)))
;;;
;;; Updater.
;;;
(define (latest-release package)
"Return an <upstream-source> for the latest release of PACKAGE."
(define name
(if (string-prefix? "r-" package)
(string-drop package 2)
package))
(define sxml
(cran-fetch name))
(and sxml
(sxml-match-let*
(((*TOP* (xhtml:html
,head
(xhtml:body
(xhtml:h2 ,name-and-synopsis)
(xhtml:p ,description)
,summary
(xhtml:h4 "Downloads:") ,downloads
. ,rest)))
sxml))
(let ((version (nodes->text (table-datum summary "Version:")))
(url (downloads->url downloads)))
;; CRAN does not provide signatures.
(upstream-source
(package package)
(version version)
(urls (list url)))))))
(define (cran-package? package)
"Return true if PACKAGE is an R package from CRAN."
;; Assume all R packages are available on CRAN.
(string-prefix? "r-" (package-name package)))
(define %cran-updater
(upstream-updater
(name 'cran)
(description "Updater for CRAN packages")
(pred cran-package?)
(latest latest-release)))
;;; cran.scm ends here