From 55549c7b9b778a79d3e1f3d085861ef36aabdca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Fri, 30 Aug 2019 00:54:15 +0200 Subject: [PATCH] lint: Add 'archival' checker. * guix/lint.scm (check-archival): New procedure. (%network-dependent-checkers): Add 'archival' checker. * tests/lint.scm ("archival: missing content") ("archival: content available") ("archival: missing revision") ("archival: revision available") ("archival: rate limit reached"): New tests. * doc/guix.texi (Invoking guix lint): Document it. --- doc/guix.texi | 25 +++++++++++++ guix/lint.scm | 96 +++++++++++++++++++++++++++++++++++++++++++++++++- tests/lint.scm | 81 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 1 deletion(-) diff --git a/doc/guix.texi b/doc/guix.texi index 0510f57c23..de02ad8687 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -9249,6 +9249,31 @@ Parse the @code{source} URL to determine if a tarball from GitHub is autogenerated or if it is a release tarball. Unfortunately GitHub's autogenerated tarballs are sometimes regenerated. +@item archival +@cindex Software Heritage, source code archive +@cindex archival of source code, Software Heritage +Checks whether the package's source code is archived at +@uref{https://www.softwareheritage.org, Software Heritage}. + +When the source code that is not archived comes from a version-control system +(VCS)---e.g., it's obtained with @code{git-fetch}, send Software Heritage a +``save'' request so that it eventually archives it. This ensures that the +source will remain available in the long term, and that Guix can fall back to +Software Heritage should the source code disappear from its original host. +The status of recent ``save'' requests can be +@uref{https://archive.softwareheritage.org/save/#requests, viewed on-line}. + +When source code is a tarball obtained with @code{url-fetch}, simply print a +message when it is not archived. As of this writing, Software Heritage does +not allow requests to save arbitrary tarballs; we are working on ways to +ensure that non-VCS source code is also archived. + +Software Heritage +@uref{https://archive.softwareheritage.org/api/#rate-limiting, limits the +request rate per IP address}. When the limit is reached, @command{guix lint} +prints a message and the @code{archival} checker stops doing anything until +that limit has been reset. + @item cve @cindex security vulnerabilities @cindex CVE, Common Vulnerabilities and Exposures diff --git a/guix/lint.scm b/guix/lint.scm index 254f4e2830..ba38bef806 100644 --- a/guix/lint.scm +++ b/guix/lint.scm @@ -44,6 +44,8 @@ #:use-module ((guix ui) #:select (texi->plain-text fill-paragraph)) #:use-module (guix gnu-maintenance) #:use-module (guix cve) + #:use-module ((guix swh) #:hide (origin?)) + #:autoload (guix git-download) (git-reference?) #:use-module (ice-9 match) #:use-module (ice-9 regex) #:use-module (ice-9 format) @@ -80,6 +82,7 @@ check-vulnerabilities check-for-updates check-formatting + check-archival lint-warning lint-warning? @@ -1033,6 +1036,93 @@ the NIST server non-fatal." '())) (#f '()))) ; cannot find newer upstream release + +(define (check-archival package) + "Check whether PACKAGE's source code is archived on Software Heritage. If +it's not, and if its source code is a VCS snapshot, then send a \"save\" +request to Software Heritage. + +Software Heritage imposes limits on the request rate per client IP address. +This checker prints a notice and stops doing anything once that limit has been +reached." + (define (response->warning url method response) + (if (request-rate-limit-reached? url method) + (list (make-warning package + (G_ "Software Heritage rate limit reached; \ +try again later") + #:field 'source)) + (list (make-warning package + (G_ "'~a' returned ~a") + (list url (response-code response)) + #:field 'source)))) + + (define skip-key (gensym "skip-archival-check")) + + (define (skip-when-limit-reached url method) + (or (not (request-rate-limit-reached? url method)) + (throw skip-key #t))) + + (parameterize ((%allow-request? skip-when-limit-reached)) + (catch #t + (lambda () + (match (and (origin? (package-source package)) + (package-source package)) + (#f ;no source + '()) + ((= origin-uri (? git-reference? reference)) + (define url + (git-reference-url reference)) + (define commit + (git-reference-commit reference)) + + (match (if (commit-id? commit) + (or (lookup-revision commit) + (lookup-origin-revision url commit)) + (lookup-origin-revision url commit)) + ((? revision? revision) + '()) + (#f + ;; Revision is missing from the archive, attempt to save it. + (catch 'swh-error + (lambda () + (save-origin (git-reference-url reference) "git") + (list (make-warning + package + ;; TRANSLATORS: "Software Heritage" is a proper noun + ;; that must remain untranslated. See + ;; . + (G_ "scheduled Software Heritage archival") + #:field 'source))) + (lambda (key url method response . _) + (cond ((= 429 (response-code response)) + (list (make-warning + package + (G_ "archival rate limit exceeded; \ +try again later") + #:field 'source))) + (else + (response->warning url method response)))))))) + ((? origin? origin) + ;; Since "save" origins are not supported for non-VCS source, all + ;; we can do is tell whether a given tarball is available or not. + (if (origin-sha256 origin) ;XXX: for ungoogled-chromium + (match (lookup-content (origin-sha256 origin) "sha256") + (#f + (list (make-warning package + (G_ "source not archived on Software \ +Heritage") + #:field 'source))) + ((? content?) + '())) + '())))) + (match-lambda* + ((key url method response) + (response->warning url method response)) + ((key . args) + (if (eq? key skip-key) + '() + (apply throw key args))))))) + ;;; ;;; Source code formatting. @@ -1237,7 +1327,11 @@ or a list thereof") (lint-checker (name 'refresh) (description "Check the package for new upstream releases") - (check check-for-updates)))) + (check check-for-updates)) + (lint-checker + (name 'archival) + (description "Ensure source code archival on Software Heritage") + (check check-archival)))) (define %all-checkers (append %local-checkers diff --git a/tests/lint.scm b/tests/lint.scm index c8b88136f4..1b92f02b85 100644 --- a/tests/lint.scm +++ b/tests/lint.scm @@ -35,6 +35,7 @@ #:use-module (guix packages) #:use-module (guix lint) #:use-module (guix ui) + #:use-module (guix swh) #:use-module (gnu packages) #:use-module (gnu packages glib) #:use-module (gnu packages pkg-config) @@ -47,6 +48,7 @@ #:use-module (ice-9 regex) #:use-module (ice-9 getopt-long) #:use-module (ice-9 pretty-print) + #:use-module (rnrs bytevectors) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9 gnu) #:use-module (srfi srfi-26) @@ -859,6 +861,85 @@ '() (check-formatting (dummy-package "x"))) +(test-assert "archival: missing content" + (let* ((origin (origin + (method url-fetch) + (uri "http://example.org/foo.tgz") + (sha256 (make-bytevector 32)))) + (warnings (with-http-server '((404 "Not archived.")) + (parameterize ((%swh-base-url (%local-url))) + (check-archival (dummy-package "x" + (source origin))))))) + (warning-contains? "not archived" warnings))) + +(test-equal "archival: content available" + '() + (let* ((origin (origin + (method url-fetch) + (uri "http://example.org/foo.tgz") + (sha256 (make-bytevector 32)))) + ;; https://archive.softwareheritage.org/api/1/content/ + (content "{ \"checksums\": {}, \"data_url\": \"xyz\", + \"length\": 42 }")) + (with-http-server `((200 ,content)) + (parameterize ((%swh-base-url (%local-url))) + (check-archival (dummy-package "x" (source origin))))))) + +(test-assert "archival: missing revision" + (let* ((origin (origin + (method git-fetch) + (uri (git-reference + (url "http://example.org/foo.git") + (commit "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))) + (sha256 (make-bytevector 32)))) + ;; https://archive.softwareheritage.org/api/1/origin/save/ + (save "{ \"origin_url\": \"http://example.org/foo.git\", + \"save_request_date\": \"2014-11-17T22:09:38+01:00\", + \"save_request_status\": \"accepted\", + \"save_task_status\": \"scheduled\" }") + (warnings (with-http-server `((404 "No revision.") ;lookup-revision + (404 "No origin.") ;lookup-origin + (200 ,save)) ;save-origin + (parameterize ((%swh-base-url (%local-url))) + (check-archival (dummy-package "x" (source origin))))))) + (warning-contains? "scheduled" warnings))) + +(test-equal "archival: revision available" + '() + (let* ((origin (origin + (method git-fetch) + (uri (git-reference + (url "http://example.org/foo.git") + (commit "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))) + (sha256 (make-bytevector 32)))) + ;; https://archive.softwareheritage.org/api/1/revision/ + (revision "{ \"author\": {}, \"parents\": [], + \"date\": \"2014-11-17T22:09:38+01:00\" }")) + (with-http-server `((200 ,revision)) + (parameterize ((%swh-base-url (%local-url))) + (check-archival (dummy-package "x" (source origin))))))) + +(test-assert "archival: rate limit reached" + ;; We should get a single warning stating that the rate limit was reached, + ;; and nothing more, in particular no other HTTP requests. + (let* ((origin (origin + (method url-fetch) + (uri "http://example.org/foo.tgz") + (sha256 (make-bytevector 32)))) + (too-many (build-response + #:code 429 + #:reason-phrase "Too many requests" + #:headers '((x-ratelimit-remaining . "0") + (x-ratelimit-reset . "3000000000")))) + (warnings (with-http-server `((,too-many "Rate limit reached.")) + (parameterize ((%swh-base-url (%local-url))) + (append-map (lambda (name) + (check-archival + (dummy-package name (source origin)))) + '("x" "y" "z")))))) + (string-contains (single-lint-warning-message warnings) + "rate limit reached"))) + (test-end "lint") ;; Local Variables: