;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net> ;;; Copyright © 2015, 2016, 2017 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. (define-module (guix import cran) #:use-module (ice-9 match) #:use-module (ice-9 regex) #:use-module ((ice-9 rdelim) #:select (read-string read-line)) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (srfi srfi-34) #:use-module (srfi srfi-41) #:use-module (ice-9 receive) #:use-module (web uri) #:use-module (guix memoization) #:use-module (guix http-client) #:use-module (guix hash) #:use-module (guix store) #:use-module (guix base32) #:use-module ((guix download) #:select (download-to-store)) #:use-module (guix import utils) #:use-module ((guix build utils) #:select (find-files)) #:use-module (guix utils) #:use-module ((guix build-system r) #:select (cran-uri bioconductor-uri)) #:use-module (guix upstream) #:use-module (guix packages) #:use-module (gnu packages) #:export (cran->guix-package bioconductor->guix-package recursive-import %cran-updater %bioconductor-updater cran-package? bioconductor-package? bioconductor-data-package? bioconductor-experiment-package?)) ;;; Commentary: ;;; ;;; Generate a package declaration template for the latest version of an R ;;; package on CRAN, using the DESCRIPTION file downloaded from ;;; cran.r-project.org. ;;; ;;; Code: (define string->license (match-lambda ("AGPL-3" 'agpl3+) ("Artistic-2.0" 'artistic2.0) ("Apache License 2.0" 'asl2.0) ("BSD_2_clause" 'bsd-2) ("BSD_2_clause + file LICENSE" 'bsd-2) ("BSD_3_clause" 'bsd-3) ("BSD_3_clause + file LICENSE" 'bsd-3) ("GPL" '(list gpl2+ gpl3+)) ("GPL (>= 2)" 'gpl2+) ("GPL (>= 3)" 'gpl3+) ("GPL-2" 'gpl2) ("GPL-3" 'gpl3) ("LGPL-2" 'lgpl2.0) ("LGPL-2.1" 'lgpl2.1) ("LGPL-3" 'lgpl3) ("LGPL (>= 2)" 'lgpl2.0+) ("LGPL (>= 3)" 'lgpl3+) ("MIT" 'expat) ("MIT + file LICENSE" 'expat) ((x) (string->license x)) ((lst ...) `(list ,@(map string->license lst))) (_ #f))) (define (description->alist description) "Convert a DESCRIPTION string into an alist." (let ((lines (string-split description #\newline)) (parse (lambda (line acc) (if (string-null? line) acc ;; Keys usually start with a capital letter and end with ;; ":". There are some exceptions, unfortunately (such ;; as "biocViews"). There are no blanks in a key. (if (string-match "^[A-Za-z][^ :]+:( |\n|$)" line) ;; New key/value pair (let* ((pos (string-index line #\:)) (key (string-take line pos)) (value (string-drop line (+ 1 pos)))) (cons (cons key (string-trim-both value)) acc)) ;; This is a continuation of the previous pair (match-let ((((key . value) . rest) acc)) (cons (cons key (string-join (list value (string-trim-both line)))) rest))))))) (fold parse '() lines))) (define (format-inputs names) "Generate a sorted list of package inputs from a list of package NAMES." (map (lambda (name) (list name (list 'unquote (string->symbol name)))) (sort names string-ci<?))) (define* (maybe-inputs package-inputs #:optional (type 'inputs)) "Given a list of PACKAGE-INPUTS, tries to generate the TYPE field of a package definition." (match package-inputs (() '()) ((package-inputs ...) `((,type (,'quasiquote ,(format-inputs package-inputs))))))) (define %cran-url "http://cran.r-project.org/web/packages/") (define %bioconductor-url "https://bioconductor.org/packages/") ;; The latest Bioconductor release is 3.6. Bioconductor packages should be ;; updated together. (define %bioconductor-version "3.6") (define %bioconductor-packages-list-url (string-append "https://bioconductor.org/packages/" %bioconductor-version "/bioc/src/contrib/PACKAGES")) (define (bioconductor-packages-list) "Return the latest version of package NAME for the current bioconductor release." (let ((url (string->uri %bioconductor-packages-list-url))) (guard (c ((http-get-error? c) (format (current-error-port) "error: failed to retrieve list of packages from ~s: ~a (~s)~%" (uri->string (http-get-error-uri c)) (http-get-error-code c) (http-get-error-reason c)) #f)) ;; Split the big list on empty lines, then turn each chunk into an ;; alist of attributes. (map (lambda (chunk) (description->alist (string-join chunk "\n"))) (chunk-lines (read-lines (http-fetch/cached url))))))) (define (latest-bioconductor-package-version name) "Return the version string corresponding to the latest release of the bioconductor package NAME, or #F if the package is unknown." (and=> (find (lambda (meta) (string=? (assoc-ref meta "Package") name)) (bioconductor-packages-list)) (cut assoc-ref <> "Version"))) (define (fetch-description repository name) "Return an alist of the contents of the DESCRIPTION file for the R package NAME in the given REPOSITORY, or #f in case of failure. NAME is case-sensitive." (case repository ((cran) (let ((url (string-append %cran-url name "/DESCRIPTION"))) (guard (c ((http-get-error? c) (format (current-error-port) "error: failed to retrieve package information \ from ~s: ~a (~s)~%" (uri->string (http-get-error-uri c)) (http-get-error-code c) (http-get-error-reason c)) #f)) (description->alist (read-string (http-fetch url)))))) ((bioconductor) ;; Currently, the bioconductor project does not offer a way to access a ;; package's DESCRIPTION file over HTTP, so we determine the version, ;; download the source tarball, and then extract the DESCRIPTION file. (let* ((version (latest-bioconductor-package-version name)) (url (car (bioconductor-uri name version))) (tarball (with-store store (download-to-store store url)))) (call-with-temporary-directory (lambda (dir) (parameterize ((current-error-port (%make-void-port "rw+")) (current-output-port (%make-void-port "rw+"))) (and (zero? (system* "tar" "--wildcards" "-x" "--strip-components=1" "-C" dir "-f" tarball "*/DESCRIPTION")) (description->alist (with-input-from-file (string-append dir "/DESCRIPTION") read-string)))))))))) (define (listify meta field) "Look up FIELD in the alist META. If FIELD contains a comma-separated string, turn it into a list and strip off parenthetic expressions. Return the empty list when the FIELD cannot be found." (let ((value (assoc-ref meta field))) (if (not value) '() ;; Strip off parentheses (let ((items (string-split (regexp-substitute/global #f "( *\\([^\\)]+\\)) *" value 'pre 'post) #\,))) (remove (lambda (item) (or (string-null? item) ;; When there is whitespace inside of items it is ;; probably because this was not an actual list to ;; begin with. (string-any char-set:whitespace item))) (map string-trim-both items)))))) (define default-r-packages (list "base" "compiler" "grDevices" "graphics" "grid" "methods" "parallel" "splines" "stats" "stats4" "tcltk" "tools" "translations" "utils")) (define (guix-name name) "Return a Guix package name for a given R package name." (string-append "r-" (string-map (match-lambda (#\_ #\-) (#\. #\-) (chr (char-downcase chr))) name))) (define (needs-fortran? tarball) "Check if the TARBALL contains Fortran source files." (define (check pattern) (parameterize ((current-error-port (%make-void-port "rw+")) (current-output-port (%make-void-port "rw+"))) (zero? (system* "tar" "--wildcards" "--list" pattern "-f" tarball)))) (or (check "*.f90") (check "*.f95") (check "*.f"))) (define (tarball-files-match-pattern? tarball regexp . file-patterns) "Return #T if any of the files represented by FILE-PATTERNS in the TARBALL match the given REGEXP." (call-with-temporary-directory (lambda (dir) (let ((pattern (make-regexp regexp))) (parameterize ((current-error-port (%make-void-port "rw+"))) (apply system* "tar" "xf" tarball "-C" dir `("--wildcards" ,@file-patterns))) (any (lambda (file) (call-with-input-file file (lambda (port) (let loop () (let ((line (read-line port))) (cond ((eof-object? line) #f) ((regexp-exec pattern line) #t) (else (loop)))))))) (find-files dir)))))) (define (needs-zlib? tarball) "Return #T if any of the Makevars files in the src directory of the TARBALL contain a zlib linker flag." (tarball-files-match-pattern? tarball "-lz" "*/src/Makevars*" "*/src/configure*" "*/configure*")) (define (needs-pkg-config? tarball) "Return #T if any of the Makevars files in the src directory of the TARBALL reference the pkg-config tool." (tarball-files-match-pattern? tarball "pkg-config" "*/src/Makevars*" "*/src/configure*" "*/configure*")) (define (description->package repository meta) "Return the `package' s-expression for an R package published on REPOSITORY from the alist META, which was derived from the R package's DESCRIPTION file." (let* ((base-url (case repository ((cran) %cran-url) ((bioconductor) %bioconductor-url))) (uri-helper (case repository ((cran) cran-uri) ((bioconductor) bioconductor-uri))) (name (assoc-ref meta "Package")) (synopsis (assoc-ref meta "Title")) (version (assoc-ref meta "Version")) (license (string->license (assoc-ref meta "License"))) ;; Some packages have multiple home pages. Some have none. (home-page (match (listify meta "URL") ((url rest ...) url) (_ (string-append base-url name)))) (source-url (match (uri-helper name version) ((url rest ...) url) ((? string? url) url) (_ #f))) (tarball (with-store store (download-to-store store source-url))) (sysdepends (append (if (needs-zlib? tarball) '("zlib") '()) (map string-downcase (listify meta "SystemRequirements")))) (propagate (filter (lambda (name) (not (member name default-r-packages))) (lset-union equal? (listify meta "Imports") (listify meta "LinkingTo") (delete "R" (listify meta "Depends")))))) (values `(package (name ,(guix-name name)) (version ,version) (source (origin (method url-fetch) (uri (,(procedure-name uri-helper) ,name version)) (sha256 (base32 ,(bytevector->nix-base32-string (file-sha256 tarball)))))) ,@(if (not (equal? (string-append "r-" name) (guix-name name))) `((properties ,`(,'quasiquote ((,'upstream-name . ,name))))) '()) (build-system r-build-system) ,@(maybe-inputs sysdepends) ,@(maybe-inputs (map guix-name propagate) 'propagated-inputs) ,@(maybe-inputs `(,@(if (needs-fortran? tarball) '("gfortran") '()) ,@(if (needs-pkg-config? tarball) '("pkg-config") '())) 'native-inputs) (home-page ,(if (string-null? home-page) (string-append base-url name) home-page)) (synopsis ,synopsis) (description ,(beautify-description (or (assoc-ref meta "Description") ""))) (license ,license)) propagate))) (define cran->guix-package (memoize (lambda* (package-name #:optional (repo 'cran)) "Fetch the metadata for PACKAGE-NAME from REPO and return the `package' s-expression corresponding to that package, or #f on failure." (and=> (fetch-description repo package-name) (cut description->package repo <>))))) (define* (recursive-import package-name #:optional (repo 'cran)) "Generate a stream of package expressions for PACKAGE-NAME and all its dependencies." (receive (package . dependencies) (cran->guix-package package-name repo) (if (not package) stream-null ;; Generate a lazy stream of package expressions for all unknown ;; dependencies in the graph. (let* ((make-state (lambda (queue done) (cons queue done))) (next (match-lambda (((next . rest) . done) next))) (imported (match-lambda ((queue . done) done))) (done? (match-lambda ((queue . done) (zero? (length queue))))) (unknown? (lambda* (dependency #:optional (done '())) (and (not (member dependency done)) (null? (find-packages-by-name (guix-name dependency)))))) (update (lambda (state new-queue) (match state (((head . tail) . done) (make-state (lset-difference equal? (lset-union equal? new-queue tail) done) (cons head done))))))) (stream-cons package (stream-unfold ;; map: produce a stream element (lambda (state) (cran->guix-package (next state) repo)) ;; predicate (negate done?) ;; generator: update the queue (lambda (state) (receive (package . dependencies) (cran->guix-package (next state) repo) (if package (update state (filter (cut unknown? <> (cons (next state) (imported state))) (car dependencies))) ;; TODO: Try the other archives before giving up (update state (imported state))))) ;; initial state (make-state (filter unknown? (car dependencies)) (list package-name)))))))) ;;; ;;; Updater. ;;; (define (package->upstream-name package) "Return the upstream name of the PACKAGE." (let* ((properties (package-properties package)) (upstream-name (and=> properties (cut assoc-ref <> 'upstream-name)))) (if upstream-name upstream-name (match (package-source package) ((? origin? origin) (match (origin-uri origin) ((or (? string? url) (url _ ...)) (let ((end (string-rindex url #\_)) (start (string-rindex url #\/))) ;; The URL ends on ;; (string-append "/" name "_" version ".tar.gz") (and start end (substring url (+ start 1) end)))) (_ #f))) (_ #f))))) (define (latest-cran-release package) "Return an <upstream-source> for the latest release of PACKAGE." (define upstream-name (package->upstream-name package)) (define meta (fetch-description 'cran upstream-name)) (and meta (let ((version (assoc-ref meta "Version"))) ;; CRAN does not provide signatures. (upstream-source (package (package-name package)) (version version) (urls (cran-uri upstream-name version)))))) (define (latest-bioconductor-release package) "Return an <upstream-source> for the latest release of PACKAGE." (define upstream-name (package->upstream-name package)) (define version (latest-bioconductor-package-version upstream-name)) (and version ;; Bioconductor does not provide signatures. (upstream-source (package (package-name package)) (version version) (urls (bioconductor-uri upstream-name version))))) (define (cran-package? package) "Return true if PACKAGE is an R package from CRAN." (and (string-prefix? "r-" (package-name package)) ;; Check if the upstream name can be extracted from package uri. (package->upstream-name package) ;; Check if package uri(s) are prefixed by "mirror://cran". (match (and=> (package-source package) origin-uri) ((? string? uri) (string-prefix? "mirror://cran" uri)) ((? list? uris) (any (cut string-prefix? "mirror://cran" <>) uris)) (_ #f)))) (define (bioconductor-package? package) "Return true if PACKAGE is an R package from Bioconductor." (let ((predicate (lambda (uri) (and (string-prefix? "https://bioconductor.org" uri) ;; Data packages are neither listed in SVN nor on ;; the Github mirror, so we have to exclude them ;; from the set of bioconductor packages that can be ;; updated automatically. (not (string-contains uri "/data/annotation/")) ;; Experiment packages are in a separate repository. (not (string-contains uri "/data/experiment/")))))) (and (string-prefix? "r-" (package-name package)) (match (and=> (package-source package) origin-uri) ((? string? uri) (predicate uri)) ((? list? uris) (any predicate uris)) (_ #f))))) (define (bioconductor-data-package? package) "Return true if PACKAGE is an R data package from Bioconductor." (let ((predicate (lambda (uri) (and (string-prefix? "https://bioconductor.org" uri) (string-contains uri "/data/annotation/"))))) (and (string-prefix? "r-" (package-name package)) (match (and=> (package-source package) origin-uri) ((? string? uri) (predicate uri)) ((? list? uris) (any predicate uris)) (_ #f))))) (define (bioconductor-experiment-package? package) "Return true if PACKAGE is an R experiment package from Bioconductor." (let ((predicate (lambda (uri) (and (string-prefix? "https://bioconductor.org" uri) (string-contains uri "/data/experiment/"))))) (and (string-prefix? "r-" (package-name package)) (match (and=> (package-source package) origin-uri) ((? string? uri) (predicate uri)) ((? list? uris) (any predicate uris)) (_ #f))))) (define %cran-updater (upstream-updater (name 'cran) (description "Updater for CRAN packages") (pred cran-package?) (latest latest-cran-release))) (define %bioconductor-updater (upstream-updater (name 'bioconductor) (description "Updater for Bioconductor packages") (pred bioconductor-package?) (latest latest-bioconductor-release))) ;;; cran.scm ends here