Add (guix swh).
* guix/swh.scm: New file. * Makefile.am (MODULES): Add it.
This commit is contained in:
parent
3b5829bbcb
commit
de2bfe9029
|
@ -75,6 +75,7 @@ MODULES = \
|
|||
guix/discovery.scm \
|
||||
guix/git-download.scm \
|
||||
guix/hg-download.scm \
|
||||
guix/swh.scm \
|
||||
guix/monads.scm \
|
||||
guix/monad-repl.scm \
|
||||
guix/gexp.scm \
|
||||
|
|
|
@ -0,0 +1,551 @@
|
|||
;;; GNU Guix --- Functional package management for GNU
|
||||
;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
|
||||
;;;
|
||||
;;; This file is part of GNU Guix.
|
||||
;;;
|
||||
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the GNU General Public License as published by
|
||||
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||
;;; your option) any later version.
|
||||
;;;
|
||||
;;; GNU Guix is distributed in the hope that it will be useful, but
|
||||
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;; GNU General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU General Public License
|
||||
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
(define-module (guix swh)
|
||||
#:use-module (guix base16)
|
||||
#:use-module (guix build utils)
|
||||
#:use-module ((guix build syscalls) #:select (mkdtemp!))
|
||||
#:use-module (web client)
|
||||
#:use-module (web response)
|
||||
#:use-module (json)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-9)
|
||||
#:use-module (srfi srfi-11)
|
||||
#:use-module (srfi srfi-19)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 regex)
|
||||
#:use-module (ice-9 popen)
|
||||
#:use-module ((ice-9 ftw) #:select (scandir))
|
||||
#:export (origin?
|
||||
origin-id
|
||||
origin-type
|
||||
origin-url
|
||||
origin-visits
|
||||
lookup-origin
|
||||
|
||||
visit?
|
||||
visit-date
|
||||
visit-origin
|
||||
visit-url
|
||||
visit-snapshot-url
|
||||
visit-status
|
||||
visit-number
|
||||
visit-snapshot
|
||||
|
||||
branch?
|
||||
branch-name
|
||||
branch-target
|
||||
|
||||
release?
|
||||
release-id
|
||||
release-name
|
||||
release-message
|
||||
release-target
|
||||
|
||||
revision?
|
||||
revision-id
|
||||
revision-date
|
||||
revision-directory
|
||||
lookup-revision
|
||||
lookup-origin-revision
|
||||
|
||||
content?
|
||||
content-checksums
|
||||
content-data-url
|
||||
content-length
|
||||
lookup-content
|
||||
|
||||
directory-entry?
|
||||
directory-entry-name
|
||||
directory-entry-type
|
||||
directory-entry-checksums
|
||||
directory-entry-length
|
||||
directory-entry-permissions
|
||||
lookup-directory
|
||||
directory-entry-target
|
||||
|
||||
vault-reply?
|
||||
vault-reply-id
|
||||
vault-reply-fetch-url
|
||||
vault-reply-object-id
|
||||
vault-reply-object-type
|
||||
vault-reply-progress-message
|
||||
vault-reply-status
|
||||
query-vault
|
||||
request-cooking
|
||||
vault-fetch
|
||||
|
||||
swh-download))
|
||||
|
||||
;;; Commentary:
|
||||
;;;
|
||||
;;; This module provides bindings to the HTTP interface of Software Heritage.
|
||||
;;; It allows you to browse the archive, look up revisions (such as SHA1
|
||||
;;; commit IDs), "origins" (code hosting URLs), content (files), etc. See
|
||||
;;; <https://archive.softwareheritage.org/api/> for more information.
|
||||
;;;
|
||||
;;; The high-level 'swh-download' procedure allows you to download a Git
|
||||
;;; revision from Software Heritage, provided it is available.
|
||||
;;;
|
||||
;;; Code:
|
||||
|
||||
(define %swh-base-url
|
||||
;; Presumably we won't need to change it.
|
||||
"https://archive.softwareheritage.org")
|
||||
|
||||
(define (swh-url path . rest)
|
||||
(define url
|
||||
(string-append %swh-base-url path
|
||||
(string-join rest "/" 'prefix)))
|
||||
|
||||
;; Ensure there's a trailing slash or we get a redirect.
|
||||
(if (string-suffix? "/" url)
|
||||
url
|
||||
(string-append url "/")))
|
||||
|
||||
(define-syntax-rule (define-json-reader json->record ctor spec ...)
|
||||
"Define JSON->RECORD as a procedure that converts a JSON representation,
|
||||
read from a port, string, or hash table, into a record created by CTOR and
|
||||
following SPEC, a series of field specifications."
|
||||
(define (json->record input)
|
||||
(let ((table (cond ((port? input)
|
||||
(json->scm input))
|
||||
((string? input)
|
||||
(json-string->scm input))
|
||||
((hash-table? input)
|
||||
input))))
|
||||
(let-syntax ((extract-field (syntax-rules ()
|
||||
((_ table (field key json->value))
|
||||
(json->value (hash-ref table key)))
|
||||
((_ table (field key))
|
||||
(hash-ref table key))
|
||||
((_ table (field))
|
||||
(hash-ref table
|
||||
(symbol->string 'field))))))
|
||||
(ctor (extract-field table spec) ...)))))
|
||||
|
||||
(define-syntax-rule (define-json-mapping rtd ctor pred json->record
|
||||
(field getter spec ...) ...)
|
||||
"Define RTD as a record type with the given FIELDs and GETTERs, à la SRFI-9,
|
||||
and define JSON->RECORD as a conversion from JSON to a record of this type."
|
||||
(begin
|
||||
(define-record-type rtd
|
||||
(ctor field ...)
|
||||
pred
|
||||
(field getter) ...)
|
||||
|
||||
(define-json-reader json->record ctor
|
||||
(field spec ...) ...)))
|
||||
|
||||
(define %date-regexp
|
||||
;; Match strings like "2014-11-17T22:09:38+01:00" or
|
||||
;; "2018-09-30T23:20:07.815449+00:00"".
|
||||
(make-regexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})((\\.[0-9]+)?)([+-][0-9]{2}):([0-9]{2})$"))
|
||||
|
||||
(define (string->date* str)
|
||||
"Return a SRFI-19 date parsed from STR, a date string as returned by
|
||||
Software Heritage."
|
||||
;; We can't use 'string->date' because of the timezone format: SWH returns
|
||||
;; "+01:00" when the '~z' template expects "+0100". So we roll our own!
|
||||
(or (and=> (regexp-exec %date-regexp str)
|
||||
(lambda (match)
|
||||
(define (ref n)
|
||||
(string->number (match:substring match n)))
|
||||
|
||||
(make-date (let ((ns (match:substring match 8)))
|
||||
(if ns
|
||||
(string->number (string-drop ns 1))
|
||||
0))
|
||||
(ref 6) (ref 5) (ref 4)
|
||||
(ref 3) (ref 2) (ref 1)
|
||||
(+ (* 3600 (ref 9)) ;time zone
|
||||
(if (< (ref 9) 0)
|
||||
(- (ref 10))
|
||||
(ref 10))))))
|
||||
str)) ;oops!
|
||||
|
||||
(define* (call url decode #:optional (method http-get)
|
||||
#:key (false-if-404? #t))
|
||||
"Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
|
||||
using DECODE, a one-argument procedure that takes an input port. When
|
||||
FALSE-IF-404? is true, return #f upon 404 responses."
|
||||
(let*-values (((response port)
|
||||
(method url #:streaming? #t)))
|
||||
;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
|
||||
(match (assq-ref (response-headers response) 'x-ratelimit-remaining)
|
||||
(#f #t)
|
||||
((? (compose zero? string->number))
|
||||
(throw 'swh-error url response))
|
||||
(_ #t))
|
||||
|
||||
(cond ((= 200 (response-code response))
|
||||
(let ((result (decode port)))
|
||||
(close-port port)
|
||||
result))
|
||||
((and false-if-404?
|
||||
(= 404 (response-code response)))
|
||||
(close-port port)
|
||||
#f)
|
||||
(else
|
||||
(close-port port)
|
||||
(throw 'swh-error url response)))))
|
||||
|
||||
(define-syntax define-query
|
||||
(syntax-rules (path)
|
||||
"Define a procedure that performs a Software Heritage query."
|
||||
((_ (name args ...) docstring (path components ...)
|
||||
json->value)
|
||||
(define (name args ...)
|
||||
docstring
|
||||
(call (swh-url components ...) json->value)))))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/origin/git/url/https://github.com/guix-mirror/guix/>
|
||||
(define-json-mapping <origin> make-origin origin?
|
||||
json->origin
|
||||
(id origin-id)
|
||||
(visits-url origin-visits-url "origin_visits_url")
|
||||
(type origin-type)
|
||||
(url origin-url))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/origin/52181937/visits/>
|
||||
(define-json-mapping <visit> make-visit visit?
|
||||
json->visit
|
||||
(date visit-date "date" string->date*)
|
||||
(origin visit-origin)
|
||||
(url visit-url "origin_visit_url")
|
||||
(snapshot-url visit-snapshot-url "snapshot_url")
|
||||
(status visit-status)
|
||||
(number visit-number "visit"))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/>
|
||||
(define-json-mapping <snapshot> make-snapshot snapshot?
|
||||
json->snapshot
|
||||
(branches snapshot-branches "branches" json->branches))
|
||||
|
||||
;; This is used for the "branches" field of snapshots.
|
||||
(define-record-type <branch>
|
||||
(make-branch name target-type target-url)
|
||||
branch?
|
||||
(name branch-name)
|
||||
(target-type branch-target-type) ;release | revision
|
||||
(target-url branch-target-url))
|
||||
|
||||
(define (json->branches branches)
|
||||
(hash-map->list (lambda (key value)
|
||||
(make-branch key
|
||||
(string->symbol
|
||||
(hash-ref value "target_type"))
|
||||
(hash-ref value "target_url")))
|
||||
branches))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/>
|
||||
(define-json-mapping <release> make-release release?
|
||||
json->release
|
||||
(id release-id)
|
||||
(name release-name)
|
||||
(message release-message)
|
||||
(target-type release-target-type "target_type" string->symbol)
|
||||
(target-url release-target-url "target_url"))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/revision/359fdda40f754bbf1b5dc261e7427b75463b59be/>
|
||||
(define-json-mapping <revision> make-revision revision?
|
||||
json->revision
|
||||
(id revision-id)
|
||||
(date revision-date "date" string->date*)
|
||||
(directory revision-directory)
|
||||
(directory-url revision-directory-url "directory_url"))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/content/>
|
||||
(define-json-mapping <content> make-content content?
|
||||
json->content
|
||||
(checksums content-checksums "checksums" json->checksums)
|
||||
(data-url content-data-url "data_url")
|
||||
(file-type-url content-file-type-url "filetype_url")
|
||||
(language-url content-language-url "language_url")
|
||||
(length content-length)
|
||||
(license-url content-license-url "license_url"))
|
||||
|
||||
(define (json->checksums checksums)
|
||||
(hash-map->list (lambda (key value)
|
||||
(cons key (base16-string->bytevector value)))
|
||||
checksums))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/>
|
||||
(define-json-mapping <directory-entry> make-directory-entry directory-entry?
|
||||
json->directory-entry
|
||||
(name directory-entry-name)
|
||||
(type directory-entry-type "type"
|
||||
(match-lambda
|
||||
("dir" 'directory)
|
||||
(str (string->symbol str))))
|
||||
(checksums directory-entry-checksums "checksums"
|
||||
(match-lambda
|
||||
(#f #f)
|
||||
(lst (json->checksums lst))))
|
||||
(id directory-entry-id "dir_id")
|
||||
(length directory-entry-length)
|
||||
(permissions directory-entry-permissions "perms")
|
||||
(target-url directory-entry-target-url "target_url"))
|
||||
|
||||
;; <https://archive.softwareheritage.org/api/1/origin/save/>
|
||||
(define-json-mapping <save-reply> make-save-reply save-reply?
|
||||
json->save-reply
|
||||
(origin-url save-reply-origin-url "origin_url")
|
||||
(origin-type save-reply-origin-type "origin_type")
|
||||
(request-date save-reply-request-date "save_request_date"
|
||||
string->date*)
|
||||
(request-status save-reply-request-status "save_request_status"
|
||||
string->symbol)
|
||||
(task-status save-reply-task-status "save_task_status"
|
||||
(match-lambda
|
||||
("not created" 'not-created)
|
||||
((? string? str) (string->symbol str)))))
|
||||
|
||||
;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
|
||||
(define-json-mapping <vault-reply> make-vault-reply vault-reply?
|
||||
json->vault-reply
|
||||
(id vault-reply-id)
|
||||
(fetch-url vault-reply-fetch-url "fetch_url")
|
||||
(object-id vault-reply-object-id "obj_id")
|
||||
(object-type vault-reply-object-type "obj_type" string->symbol)
|
||||
(progress-message vault-reply-progress-message "progress_message")
|
||||
(status vault-reply-status "status" string->symbol))
|
||||
|
||||
|
||||
;;;
|
||||
;;; RPCs.
|
||||
;;;
|
||||
|
||||
(define-query (lookup-origin url)
|
||||
"Return an origin for URL."
|
||||
(path "/api/1/origin/git/url" url)
|
||||
json->origin)
|
||||
|
||||
(define-query (lookup-content hash type)
|
||||
"Return a content for HASH, of the given TYPE--e.g., \"sha256\"."
|
||||
(path "/api/1/content"
|
||||
(string-append type ":"
|
||||
(bytevector->base16-string hash)))
|
||||
json->content)
|
||||
|
||||
(define-query (lookup-revision id)
|
||||
"Return the revision with the given ID, typically a Git commit SHA1."
|
||||
(path "/api/1/revision" id)
|
||||
json->revision)
|
||||
|
||||
(define-query (lookup-directory id)
|
||||
"Return the directory with the given ID."
|
||||
(path "/api/1/directory" id)
|
||||
json->directory-entries)
|
||||
|
||||
(define (json->directory-entries port)
|
||||
(map json->directory-entry (json->scm port)))
|
||||
|
||||
(define (origin-visits origin)
|
||||
"Return the list of visits of ORIGIN, a record as returned by
|
||||
'lookup-origin'."
|
||||
(call (swh-url (origin-visits-url origin))
|
||||
(lambda (port)
|
||||
(map json->visit (json->scm port)))))
|
||||
|
||||
(define (visit-snapshot visit)
|
||||
"Return the snapshot corresponding to VISIT."
|
||||
(call (swh-url (visit-snapshot-url visit))
|
||||
json->snapshot))
|
||||
|
||||
(define (branch-target branch)
|
||||
"Return the target of BRANCH, either a <revision> or a <release>."
|
||||
(match (branch-target-type branch)
|
||||
('release
|
||||
(call (swh-url (branch-target-url branch))
|
||||
json->release))
|
||||
('revision
|
||||
(call (swh-url (branch-target-url branch))
|
||||
json->revision))))
|
||||
|
||||
(define (lookup-origin-revision url tag)
|
||||
"Return a <revision> corresponding to the given TAG for the repository
|
||||
coming from URL. Example:
|
||||
|
||||
(lookup-origin-release \"https://github.com/guix-mirror/guix/\" \"v0.8\")
|
||||
=> #<<revision> id: \"44941…\" …>
|
||||
|
||||
The information is based on the latest visit of URL available. Return #f if
|
||||
URL could not be found."
|
||||
(match (lookup-origin url)
|
||||
(#f #f)
|
||||
(origin
|
||||
(match (origin-visits origin)
|
||||
((visit . _)
|
||||
(let ((snapshot (visit-snapshot visit)))
|
||||
(match (and=> (find (lambda (branch)
|
||||
(string=? (string-append "refs/tags/" tag)
|
||||
(branch-name branch)))
|
||||
(snapshot-branches snapshot))
|
||||
branch-target)
|
||||
((? release? release)
|
||||
(release-target release))
|
||||
((? revision? revision)
|
||||
revision)
|
||||
(#f ;tag not found
|
||||
#f))))
|
||||
(()
|
||||
#f)))))
|
||||
|
||||
(define (release-target release)
|
||||
"Return the revision that is the target of RELEASE."
|
||||
(match (release-target-type release)
|
||||
('revision
|
||||
(call (swh-url (release-target-url release))
|
||||
json->revision))))
|
||||
|
||||
(define (directory-entry-target entry)
|
||||
"If ENTRY, a directory entry, has type 'directory, return its list of
|
||||
directory entries; if it has type 'file, return its <content> object."
|
||||
(call (swh-url (directory-entry-target-url entry))
|
||||
(match (directory-entry-type entry)
|
||||
('file json->content)
|
||||
('directory json->directory-entries))))
|
||||
|
||||
(define* (save-origin url #:optional (type "git"))
|
||||
"Request URL to be saved."
|
||||
(call (swh-url "/api/1/origin/save" type "url" url) json->save-reply
|
||||
http-post))
|
||||
|
||||
(define-query (save-origin-status url type)
|
||||
"Return the status of a /save request for URL and TYPE (e.g., \"git\")."
|
||||
(path "/api/1/origin/save" type "url" url)
|
||||
json->save-reply)
|
||||
|
||||
(define-query (query-vault id kind)
|
||||
"Ask the availability of object ID and KIND to the vault, where KIND is
|
||||
'directory or 'revision. Return #f if it could not be found, or a
|
||||
<vault-reply> on success."
|
||||
;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
|
||||
;; There's a single format supported for directories and revisions and for
|
||||
;; now, the "/format" bit of the URL *must* be omitted.
|
||||
(path "/api/1/vault" (symbol->string kind) id)
|
||||
json->vault-reply)
|
||||
|
||||
(define (request-cooking id kind)
|
||||
"Request the cooking of object ID and KIND (one of 'directory or 'revision)
|
||||
to the vault. Return a <vault-reply>."
|
||||
(call (swh-url "/api/1/vault" (symbol->string kind) id)
|
||||
json->vault-reply
|
||||
http-post))
|
||||
|
||||
(define* (vault-fetch id kind
|
||||
#:key (log-port (current-error-port)))
|
||||
"Return an input port from which a bundle of the object with the given ID
|
||||
and KIND (one of 'directory or 'revision) can be retrieved, or #f if the
|
||||
object could not be found.
|
||||
|
||||
For a directory, the returned stream is a gzip-compressed tarball. For a
|
||||
revision, it is a gzip-compressed stream for 'git fast-import'."
|
||||
(let loop ((reply (query-vault id kind)))
|
||||
(match reply
|
||||
(#f
|
||||
(and=> (request-cooking id kind) loop))
|
||||
(_
|
||||
(match (vault-reply-status reply)
|
||||
('done
|
||||
;; Fetch the bundle.
|
||||
(let-values (((response port)
|
||||
(http-get (swh-url (vault-reply-fetch-url reply))
|
||||
#:streaming? #t)))
|
||||
(if (= (response-code response) 200)
|
||||
port
|
||||
(begin ;shouldn't happen
|
||||
(close-port port)
|
||||
#f))))
|
||||
('failed
|
||||
;; Upon failure, we're supposed to try again.
|
||||
(format log-port "SWH vault: failure: ~a~%"
|
||||
(vault-reply-progress-message reply))
|
||||
(format log-port "SWH vault: retrying...~%")
|
||||
(loop (request-cooking id kind)))
|
||||
((and (or 'new 'pending) status)
|
||||
;; Wait until the bundle shows up.
|
||||
(let ((message (vault-reply-progress-message reply)))
|
||||
(when (eq? 'new status)
|
||||
(format log-port "SWH vault: \
|
||||
requested bundle cooking, waiting for completion...~%"))
|
||||
(when (string? message)
|
||||
(format log-port "SWH vault: ~a~%" message))
|
||||
|
||||
;; Wait long enough so we don't exhaust our maximum number of
|
||||
;; requests per hour too fast (as of this writing, the limit is 60
|
||||
;; requests per hour per IP address.)
|
||||
(sleep (if (eq? status 'new) 60 30))
|
||||
|
||||
(loop (query-vault id kind)))))))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; High-level interface.
|
||||
;;;
|
||||
|
||||
(define (commit-id? reference)
|
||||
"Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if
|
||||
it is a tag name."
|
||||
(and (= (string-length reference) 40)
|
||||
(string-every char-set:hex-digit reference)))
|
||||
|
||||
(define (call-with-temporary-directory proc) ;FIXME: factorize
|
||||
"Call PROC with a name of a temporary directory; close the directory and
|
||||
delete it when leaving the dynamic extent of this call."
|
||||
(let* ((directory (or (getenv "TMPDIR") "/tmp"))
|
||||
(template (string-append directory "/guix-directory.XXXXXX"))
|
||||
(tmp-dir (mkdtemp! template)))
|
||||
(dynamic-wind
|
||||
(const #t)
|
||||
(lambda ()
|
||||
(proc tmp-dir))
|
||||
(lambda ()
|
||||
(false-if-exception (delete-file-recursively tmp-dir))))))
|
||||
|
||||
(define (swh-download url reference output)
|
||||
"Download from Software Heritage a checkout of the Git tag or commit
|
||||
REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success
|
||||
and #f on failure.
|
||||
|
||||
This procedure uses the \"vault\", which contains \"cooked\" directories in
|
||||
the form of tarballs. If the requested directory is not cooked yet, it will
|
||||
wait until it becomes available, which could take several minutes."
|
||||
(match (if (commit-id? reference)
|
||||
(lookup-revision reference)
|
||||
(lookup-origin-revision url reference))
|
||||
((? revision? revision)
|
||||
(call-with-temporary-directory
|
||||
(lambda (directory)
|
||||
(let ((input (vault-fetch (revision-directory revision) 'directory))
|
||||
(tar (open-pipe* OPEN_WRITE "tar" "-C" directory "-xzvf" "-")))
|
||||
(dump-port input tar)
|
||||
(close-port input)
|
||||
(let ((status (close-pipe tar)))
|
||||
(unless (zero? status)
|
||||
(error "tar extraction failure" status)))
|
||||
|
||||
(match (scandir directory)
|
||||
(("." ".." sub-directory)
|
||||
(copy-recursively (string-append directory "/" sub-directory)
|
||||
output
|
||||
#:log (%make-void-port "w"))
|
||||
#t))))))
|
||||
(#f
|
||||
#f)))
|
Loading…
Reference in New Issue