;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015 Federico Beffa ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (guix import hackage) #:use-module (ice-9 match) #:use-module (ice-9 regex) #:use-module (ice-9 rdelim) #:use-module (ice-9 receive) #:use-module (ice-9 pretty-print) #:use-module (srfi srfi-26) #:use-module (srfi srfi-34) #:use-module (srfi srfi-35) #:use-module (srfi srfi-11) #:use-module (srfi srfi-1) #:use-module ((guix download) #:select (download-to-store)) #:use-module ((guix utils) #:select (package-name->name+version)) #:use-module (guix import utils) #:use-module (guix store) #:use-module (guix hash) #:use-module (guix base32) #:use-module ((guix utils) #:select (call-with-temporary-output-file)) #:export (hackage->guix-package)) ;; Part 1: ;; ;; Functions used to read a Cabal file. (define ghc-standard-libraries ;; List of libraries distributed with ghc (7.8.4). We include GHC itself as ;; some packages list it. '("ghc" "haskell98" "hoopl" "base" "transformers" "deepseq" "array" "binary" "bytestring" "containers" "time" "cabal" "bin-package-db" "ghc-prim" "integer-gmp" "integer-simple" "win32" "template-haskell" "process" "haskeline" "terminfo" "directory" "filepath" "old-locale" "unix" "old-time" "pretty" "xhtml" "hpc")) (define package-name-prefix "ghc-") (define key-value-rx ;; Regular expression matching "key: value" (make-regexp "([a-zA-Z0-9-]+):[ \t]*(\\w?.*)$")) (define sections-rx ;; Regular expression matching a section "head sub-head ..." (make-regexp "([a-zA-Z0-9\\(\\)-]+)")) (define comment-rx ;; Regexp matching Cabal comment lines. (make-regexp "^ *--")) (define (has-key? line) "Check if LINE includes a key." (regexp-exec key-value-rx line)) (define (comment-line? line) "Check if LINE is a comment line." (regexp-exec comment-rx line)) (define (line-indentation+rest line) "Returns two results: The number of indentation spaces and the rest of the line (without indentation)." (let loop ((line-lst (string->list line)) (count 0)) ;; Sometimes values are spread over multiple lines and new lines start ;; with a comma ',' with the wrong indentation. See e.g. haddock-api. (if (or (null? line-lst) (not (or (eqv? (first line-lst) #\space) (eqv? (first line-lst) #\,) ; see, e.g., haddock-api.cabal (eqv? (first line-lst) #\tab)))) (values count (list->string line-lst)) (loop (cdr line-lst) (+ count 1))))) (define (multi-line-value lines seed) "Function to read a value split across multiple lines. LINES are the remaining input lines to be read. SEED is the value read on the same line as the key. Return two values: A list with values and the remaining lines to be processed." (define (multi-line-value-with-min-indent lines seed min-indent) (if (null? lines) (values '() '()) (let-values (((current-indent value) (line-indentation+rest (first lines))) ((next-line-indent next-line-value) (if (null? (cdr lines)) (values #f "") (line-indentation+rest (second lines))))) (if (or (not next-line-indent) (< next-line-indent min-indent) (regexp-exec condition-rx next-line-value)) (values (reverse (cons value seed)) (cdr lines)) (multi-line-value-with-min-indent (cdr lines) (cons value seed) min-indent))))) (let-values (((current-indent value) (line-indentation+rest (first lines)))) (multi-line-value-with-min-indent lines seed current-indent))) (define (read-cabal port) "Parses a Cabal file from PORT. Return a list of list pairs: (((head1 sub-head1 ... key1) (value)) ((head2 sub-head2 ... key2) (value2)) ...). We try do deduce the Cabal format from the following document: https://www.haskell.org/cabal/users-guide/developing-packages.html Keys are case-insensitive. We therefore lowercase them. Values are case-sensitive. Currently only indentation-structured files are parsed. Braces structured files are not handled." ;" <- make emacs happy. (define (read-and-trim-line port) (let ((line (read-line port))) (if (string? line) (string-trim-both line #\return) line))) (define (strip-insignificant-lines port) (let loop ((line (read-and-trim-line port)) (result '())) (cond ((eof-object? line) (reverse result)) ((or (string-null? line) (comment-line? line)) (loop (read-and-trim-line port) result)) (else (loop (read-and-trim-line port) (cons line result)))))) (let loop ((lines (strip-insignificant-lines port)) (indents '()) ; only includes indents at start of section heads. (sections '()) (result '())) (let-values (((current-indent line) (if (null? lines) (values 0 "") (line-indentation+rest (first lines)))) ((next-line-indent next-line) (if (or (null? lines) (null? (cdr lines))) (values 0 "") (line-indentation+rest (second lines))))) (if (null? lines) (reverse result) (let ((rx-result (has-key? line))) (cond (rx-result (let ((key (string-downcase (match:substring rx-result 1))) (value (match:substring rx-result 2))) (cond ;; Simple single line "key: value". ((= next-line-indent current-indent) (loop (cdr lines) indents sections (cons (list (reverse (cons key sections)) (list value)) result))) ;; Multi line "key: value\n value cont...". ((> next-line-indent current-indent) (let*-values (((value-lst lines) (multi-line-value (cdr lines) (if (string-null? value) '() `(,value))))) ;; multi-line-value returns to the first line after the ;; multi-value. (loop lines indents sections (cons (list (reverse (cons key sections)) value-lst) result)))) ;; Section ended. (else ;; Indentation is reduced. Check by how many levels. (let* ((idx (and=> (list-index (lambda (x) (= next-line-indent x)) indents) (cut + <> (if (has-key? next-line) 1 0)))) (sec (if idx (drop sections idx) (raise (condition (&message (message "unable to parse Cabal file")))))) (ind (drop indents idx))) (loop (cdr lines) ind sec (cons (list (reverse (cons key sections)) (list value)) result))))))) ;; Start of a new section. ((or (null? indents) (> current-indent (first indents))) (loop (cdr lines) (cons current-indent indents) (cons (string-downcase line) sections) result)) (else (loop (cdr lines) indents (cons (string-downcase line) (cdr sections)) result)))))))) (define condition-rx ;; Regexp for conditionals. (make-regexp "^if +(.*)$")) (define (split-section section) "Split SECTION in individual words with exception for the predicate of an 'if' conditional." (let ((rx-result (regexp-exec condition-rx section))) (if rx-result `("if" ,(match:substring rx-result 1)) (map match:substring (list-matches sections-rx section))))) (define (join-sections sec1 sec2) (fold-right cons sec2 sec1)) (define (pre-process-keys key) (match key (() '()) ((sec1 rest ...) (join-sections (split-section sec1) (pre-process-keys rest))))) (define (pre-process-entry-keys entry) (match entry ((key value) (list (pre-process-keys key) value)) (() '()))) (define (pre-process-entries-keys entries) "ENTRIES is a list of list pairs, a keys list and a valules list, as produced by 'read-cabal'. Split each element of the keys list into individual words. This pre-processing is used to read flags." (match entries ((entry rest ...) (cons (pre-process-entry-keys entry) (pre-process-entries-keys rest))) (() '()))) (define (get-flags pre-processed-entries) "PRE-PROCESSED-ENTRIES is a list of list pairs, a keys list and a values list, as produced by 'read-cabal' and pre-processed by 'pre-process-entries-keys'. Return a list of pairs with the name of flags and their default value (one of \"False\" or \"True\") as specified in the Cabal file: ((\"flag1-name\" . \"False-or-True\") ...)." ;" <- make emacs happy (match pre-processed-entries (() '()) (((("flag" flag-name "default") (flag-val)) rest ...) (cons (cons flag-name flag-val) (get-flags rest))) ((entry rest ... ) (get-flags rest)) (_ #f))) ;; Part 2: ;; ;; Functions to read information from the Cabal object created by 'read-cabal' ;; and convert Cabal format dependencies conditionals into equivalent ;; S-expressions. (define tests-rx ;; Cabal test keywords (make-regexp "(os|arch|flag|impl) *\\(([ a-zA-Z0-9_.<>=-]+)\\)")) (define parens-rx ;; Parentheses within conditions (make-regexp "\\((.+)\\)")) (define or-rx ;; OR operator in conditions (make-regexp " +\\|\\| +")) (define and-rx ;; AND operator in conditions (make-regexp " +&& +")) (define not-rx ;; NOT operator in conditions (make-regexp "^!.+")) (define (bi-op-args str match-lst) "Return a list with the arguments of (logic) bianry operators. MATCH-LST is the result of 'list-match' against a binary operator regexp on STR." (let ((operators (length match-lst))) (map (lambda (from to) (substring str from to)) (cons 0 (map match:end match-lst)) (append (map match:start match-lst) (list (string-length str)))))) (define (bi-op->sexp-like bi-op args) "BI-OP is a string with the name of a Scheme operator which in a Cabal file is represented by a binary operator. ARGS are the arguments of said operator. Return a string representing an S-expression of the operator applied to its arguments." (if (= (length args) 1) (first args) (string-append "(" bi-op (fold (lambda (arg seed) (string-append seed " " arg)) "" args) ")"))) (define (not->sexp-like arg) "If the string ARG is prefixed by a Cabal negation operator, convert it to an equivalent Scheme S-expression string." (if (regexp-exec not-rx arg) (string-append "(not " (substring arg 1 (string-length arg)) ")") arg)) (define (parens-less-cond->sexp-like conditional) "Convert a Cabal CONDITIONAL string into a string with equivalent Scheme syntax. This procedure accepts only simple conditionals without parentheses." ;; The outher operation is the one with the lowest priority: OR (bi-op->sexp-like "or" ;; each OR argument may be an AND operation (map (lambda (or-arg) (let ((m-lst (list-matches and-rx or-arg))) ;; is there an AND operation? (if (> (length m-lst) 0) (bi-op->sexp-like "and" ;; expand NOT operators when there are ANDs (map not->sexp-like (bi-op-args or-arg m-lst))) ;; ... and when there aren't. (not->sexp-like or-arg)))) ;; list of OR arguments (bi-op-args conditional (list-matches or-rx conditional))))) (define test-keyword-ornament "__") (define (conditional->sexp-like conditional) "Convert a Cabal CONDITIONAL string into a string with equivalent Scheme syntax." ;; First we substitute TEST-KEYWORD-ORNAMENT for parentheses around tests ;; keywords so that parentheses are only used to set precedences. This ;; substantially simplify parsing. (let ((conditional (regexp-substitute/global #f tests-rx conditional 'pre 1 test-keyword-ornament 2 test-keyword-ornament 'post))) (let loop ((sub-cond conditional)) (let ((rx-result (regexp-exec parens-rx sub-cond))) (cond (rx-result (parens-less-cond->sexp-like (string-append (match:prefix rx-result) (loop (match:substring rx-result 1)) (match:suffix rx-result)))) (else (parens-less-cond->sexp-like sub-cond))))))) (define (eval-flags sexp-like-cond flags) "SEXP-LIKE-COND is a string representing an S-expression conditional. FLAGS is a list of flag name and value pairs as produced by 'get-flags'. Substitute \"#t\" or \"#f\" according to the value of flags. (Default to \"True\")." (fold-right (lambda (flag sexp) (match flag ((name . value) (let ((rx (make-regexp (string-append "flag" test-keyword-ornament name test-keyword-ornament)))) (regexp-substitute/global #f rx sexp 'pre (if (string-ci= value "False") "#f" "#t") 'post))) (_ sexp))) sexp-like-cond (cons '("[a-zA-Z0-9_-]+" . "True") flags))) (define (eval-tests->sexp sexp-like-cond) "In the string SEXP-LIKE-COND substitute test keywords \"os(...)\" and \"arch(...)\" with equivalent Scheme checks. Retrun an S-expression." (with-input-from-string (fold-right (lambda (test sexp) (match test ((type pre-match post-match) (let ((rx (make-regexp (string-append type test-keyword-ornament "(\\w+)" test-keyword-ornament)))) (regexp-substitute/global #f rx sexp 'pre pre-match 2 post-match 'post))) (_ sexp))) sexp-like-cond ;; (%current-system) returns, e.g., "x86_64-linux" or "i686-linux". '(("(os|arch)" "(string-match \"" "\" (%current-system))"))) read)) (define (eval-impl sexp-like-cond) "Check for the Cabal test \"impl(...)\" in the string SEXP-LIKE-COND. Assume the module declaring the generated package includes a local variable called \"haskell-implementation\" with a string value of the form NAME-VERSION against which we compare." (with-output-to-string (lambda () (write (with-input-from-string (fold-right (lambda (test sexp) (match test ((pre-match post-match) (let ((rx-with-version (make-regexp (string-append "impl" test-keyword-ornament "([a-zA-Z0-9_-]+) *([<>=]+) *([0-9.]+) *" test-keyword-ornament))) (rx-without-version (make-regexp (string-append "impl" test-keyword-ornament "(\\w+)" test-keyword-ornament)))) (if (regexp-exec rx-with-version sexp) (regexp-substitute/global #f rx-with-version sexp 'pre pre-match 2 " " post-match " \"" 1 "-" 3 "\")" 'post) (regexp-substitute/global #f rx-without-version sexp 'pre pre-match "-match \"" 1 "\" " post-match ")" 'post)))) (_ sexp))) sexp-like-cond '(("(string" "haskell-implementation"))) read))))) (define (eval-cabal-keywords sexp-like-cond flags) ((compose eval-tests->sexp eval-impl (cut eval-flags <> flags)) sexp-like-cond)) (define (key->values meta key) "META is the representation of a Cabal file as produced by 'read-cabal'. Return the list of values associated with a specific KEY (a string)." (match meta (() '()) (((((? (lambda(x) (equal? x key)))) v) r ...) v) (((k v) r ...) (key->values (cdr meta) key)) (_ "key Not fount"))) (define (key-start-end->entries meta key-start-rx key-end-rx) "META is the representation of a Cabal file as produced by 'read-cabal'. Return all entries whose keys list starts with KEY-START and ends with KEY-END." (let ((pred (lambda (x) (and (regexp-exec key-start-rx (first x)) (regexp-exec key-end-rx (last x)))))) ;; (equal? (list key-start key-end) (list (first x) (last x)))))) (match meta (() '()) ((((? pred k) v) r ...) (cons `(,k ,v) (key-start-end->entries (cdr meta) key-start-rx key-end-rx))) (((k v) r ...) (key-start-end->entries (cdr meta) key-start-rx key-end-rx)) (_ "key Not fount")))) (define else-rx (make-regexp "^else$")) (define (count-if-else rx-result-ls) (apply + (map (lambda (m) (if m 1 0)) rx-result-ls))) (define (analyze-entry-cond entry) (let* ((keys (first entry)) (vals (second entry)) (rx-cond-result (map (cut regexp-exec condition-rx <>) keys)) (rx-else-result (map (cut regexp-exec else-rx <>) keys)) (cond-no (count-if-else rx-cond-result)) (else-no (count-if-else rx-else-result)) (cond-idx (list-index (lambda (rx) (if rx #t #f)) rx-cond-result)) (else-idx (list-index (lambda (rx) (if rx #t #f)) rx-else-result)) (key-cond (cond ((or (and cond-idx else-idx (< cond-idx else-idx)) (and cond-idx (not else-idx))) (match:substring (receive (head tail) (split-at rx-cond-result cond-idx) (first tail)))) ((or (and cond-idx else-idx (> cond-idx else-idx)) (and (not cond-idx) else-idx)) (match:substring (receive (head tail) (split-at rx-else-result else-idx) (first tail)))) (else "")))) (values keys vals rx-cond-result rx-else-result cond-no else-no key-cond))) (define (remove-cond entry cond) (match entry ((k v) (list (cdr (member cond k)) v)))) (define (group-and-reduce-level entries group group-cond) (let loop ((true-group group) (false-group '()) (entries entries)) (if (null? entries) (values (reverse true-group) (reverse false-group) entries) (let*-values (((entry) (first entries)) ((keys vals rx-cond-result rx-else-result cond-no else-no key-cond) (analyze-entry-cond entry))) (cond ((and (>= (+ cond-no else-no) 1) (string= group-cond key-cond)) (loop (cons (remove-cond entry group-cond) true-group) false-group (cdr entries))) ((and (>= (+ cond-no else-no) 1) (string= key-cond "else")) (loop true-group (cons (remove-cond entry "else") false-group) (cdr entries))) (else (values (reverse true-group) (reverse false-group) entries))))))) (define dependencies-rx (make-regexp "([a-zA-Z0-9_-]+) *[^,]*,?")) (define (hackage-name->package-name name) (if (string-prefix? package-name-prefix name) (string-downcase name) (string-append package-name-prefix (string-downcase name)))) (define (split-and-filter-dependencies ls names-to-filter) "Split the comma separated list of dependencies LS coming from the Cabal file, filter packages included in NAMES-TO-FILTER and return a list with inputs suitable for the Guix package. Currently the version information is discarded." (define (split-at-comma-and-filter d) (fold (lambda (m seed) (let* ((name (string-downcase (match:substring m 1))) (pkg-name (hackage-name->package-name name))) (if (member name names-to-filter) seed (cons (list pkg-name (list 'unquote (string->symbol pkg-name))) seed)))) '() (list-matches dependencies-rx d))) (fold (lambda (d p) (append (split-at-comma-and-filter d) p)) '() ls)) (define* (dependencies-cond->sexp meta #:key (include-test-dependencies? #t)) "META is the representation of a Cabal file as produced by 'read-cabal'. Return an S-expression containing the list of dependencies as expected by the 'inputs' field of a package. The generated S-expressions may include conditionals as defined in the cabal file. During this process we discard the version information of the packages." (define (take-dependencies meta) (let ((key-start-exe (make-regexp "executable")) (key-start-lib (make-regexp "library")) (key-start-tests (make-regexp "test-suite")) (key-end (make-regexp "build-depends"))) (append (key-start-end->entries meta key-start-exe key-end) (key-start-end->entries meta key-start-lib key-end) (if include-test-dependencies? (key-start-end->entries meta key-start-tests key-end) '())))) (let ((flags (get-flags (pre-process-entries-keys meta))) (augmented-ghc-std-libs (append (key->values meta "name") ghc-standard-libraries))) (delete-duplicates (let loop ((entries (take-dependencies meta)) (result '())) (if (null? entries) (reverse result) (let*-values (((entry) (first entries)) ((keys vals rx-cond-result rx-else-result cond-no else-no key-cond) (analyze-entry-cond entry))) (cond ((= (+ cond-no else-no) 0) (loop (cdr entries) (append (split-and-filter-dependencies vals augmented-ghc-std-libs) result))) (else (let-values (((true-group false-group entries) (group-and-reduce-level entries '() key-cond)) ((cond-final) (eval-cabal-keywords (conditional->sexp-like (last (split-section key-cond))) flags))) (loop entries (cond ((or (eq? cond-final #t) (equal? cond-final '(not #f))) (append (loop true-group '()) result)) ((or (eq? cond-final #f) (equal? cond-final '(not #t))) (append (loop false-group '()) result)) (else (let ((true-group-result (loop true-group '())) (false-group-result (loop false-group '()))) (cond ((and (null? true-group-result) (null? false-group-result)) result) ((null? false-group-result) (cons `(unquote-splicing (when ,cond-final ,true-group-result)) result)) ((null? true-group-result) (cons `(unquote-splicing (unless ,cond-final ,false-group-result)) result)) (else (cons `(unquote-splicing (if ,cond-final ,true-group-result ,false-group-result)) result)))))))))))))))) ;; Part 3: ;; ;; Retrive the desired package and its Cabal file from ;; http://hackage.haskell.org and construct the Guix package S-expression. (define (hackage-fetch name-version) "Return the Cabal file for the package NAME-VERSION, or #f on failure. If the version part is omitted from the package name, then return the latest version." (let*-values (((name version) (package-name->name+version name-version)) ((url) (if version (string-append "http://hackage.haskell.org/package/" name "-" version "/" name ".cabal") (string-append "http://hackage.haskell.org/package/" name "/" name ".cabal")))) (call-with-temporary-output-file (lambda (temp port) (and (url-fetch url temp) (call-with-input-file temp read-cabal)))))) (define string->license ;; List of valid values from ;; https://www.haskell.org ;; /cabal/release/cabal-latest/doc/API/Cabal/Distribution-License.html. (match-lambda ("GPL-2" 'gpl2) ("GPL-3" 'gpl3) ("GPL" "'gpl??") ("AGPL-3" 'agpl3) ("AGPL" "'agpl??") ("LGPL-2.1" 'lgpl2.1) ("LGPL-3" 'lgpl3) ("LGPL" "'lgpl??") ("BSD2" 'bsd-2) ("BSD3" 'bsd-3) ("MIT" 'expat) ("ISC" 'isc) ("MPL" 'mpl2.0) ("Apache-2.0" 'asl2.0) ((x) (string->license x)) ((lst ...) `(list ,@(map string->license lst))) (_ #f))) (define* (hackage-module->sexp meta #:key (include-test-dependencies? #t)) "Return the `package' S-expression for a Cabal package. META is the representation of a Cabal file as produced by 'read-cabal'." (define name (first (key->values meta "name"))) (define version (first (key->values meta "version"))) (define description (let*-values (((description) (key->values meta "description")) ((lines last) (split-at description (- (length description) 1)))) (fold-right (lambda (line seed) (string-append line "\n" seed)) (first last) lines))) (define source-url (string-append "http://hackage.haskell.org/package/" name "/" name "-" version ".tar.gz")) ;; Several packages do not have an official home-page other than on Hackage. (define home-page (let ((home-page-entry (key->values meta "homepage"))) (if (null? home-page-entry) (string-append "http://hackage.haskell.org/package/" name) (first home-page-entry)))) (define (maybe-inputs input-type inputs) (match inputs (() '()) ((inputs ...) (list (list input-type (list 'quasiquote inputs)))))) (let ((tarball (with-store store (download-to-store store source-url)))) `(package (name ,(hackage-name->package-name name)) (version ,version) (source (origin (method url-fetch) (uri (string-append ,@(factorize-uri source-url version))) (sha256 (base32 ,(if tarball (bytevector->nix-base32-string (file-sha256 tarball)) "failed to download tar archive"))))) (build-system haskell-build-system) ,@(maybe-inputs 'inputs (dependencies-cond->sexp meta #:include-test-dependencies? include-test-dependencies?)) (home-page ,home-page) (synopsis ,@(key->values meta "synopsis")) (description ,description) (license ,(string->license (key->values meta "license")))))) (define* (hackage->guix-package module-name #:key (include-test-dependencies? #t)) "Fetch the Cabal file for PACKAGE-NAME from hackage.haskell.org, and return the `package' S-expression corresponding to that package, or #f on failure." (let ((module-meta (hackage-fetch module-name))) (and=> module-meta (cut hackage-module->sexp <> #:include-test-dependencies? include-test-dependencies?)))) ;;; cabal.scm ends here