guix-devel/guix/store/deduplication.scm

178 lines
7.9 KiB
Scheme
Raw Normal View History

;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
;;; Copyright © 2018, 2019 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
;;; This houses stuff we do to files when they arrive at the store - resetting
;;; timestamps, deduplicating, etc.
(define-module (guix store deduplication)
Switch to Guile-Gcrypt. This removes (guix hash) and (guix pk-crypto), which now live as part of Guile-Gcrypt (version 0.1.0.) * guix/gcrypt.scm, guix/hash.scm, guix/pk-crypto.scm, tests/hash.scm, tests/pk-crypto.scm: Remove. * configure.ac: Test for Guile-Gcrypt. Remove LIBGCRYPT and LIBGCRYPT_LIBDIR assignments. * m4/guix.m4 (GUIX_ASSERT_LIBGCRYPT_USABLE): Remove. * README: Add Guile-Gcrypt to the dependencies; move libgcrypt as "required unless --disable-daemon". * doc/guix.texi (Requirements): Likewise. * gnu/packages/bash.scm, guix/derivations.scm, guix/docker.scm, guix/git.scm, guix/http-client.scm, guix/import/cpan.scm, guix/import/cran.scm, guix/import/crate.scm, guix/import/elpa.scm, guix/import/gnu.scm, guix/import/hackage.scm, guix/import/texlive.scm, guix/import/utils.scm, guix/nar.scm, guix/pki.scm, guix/scripts/archive.scm, guix/scripts/authenticate.scm, guix/scripts/download.scm, guix/scripts/hash.scm, guix/scripts/pack.scm, guix/scripts/publish.scm, guix/scripts/refresh.scm, guix/scripts/substitute.scm, guix/store.scm, guix/store/deduplication.scm, guix/tests.scm, tests/base32.scm, tests/builders.scm, tests/challenge.scm, tests/cpan.scm, tests/crate.scm, tests/derivations.scm, tests/gem.scm, tests/nar.scm, tests/opam.scm, tests/pki.scm, tests/publish.scm, tests/pypi.scm, tests/store-deduplication.scm, tests/store.scm, tests/substitute.scm: Adjust imports. * gnu/system/vm.scm: Likewise. (guile-sqlite3&co): Rename to... (gcrypt-sqlite3&co): ... this. Add GUILE-GCRYPT. (expression->derivation-in-linux-vm)[config]: Remove. (iso9660-image)[config]: Remove. (qemu-image)[config]: Remove. (system-docker-image)[config]: Remove. * guix/scripts/pack.scm: Adjust imports. (guile-sqlite3&co): Rename to... (gcrypt-sqlite3&co): ... this. Add GUILE-GCRYPT. (self-contained-tarball)[build]: Call 'make-config.scm' without #:libgcrypt argument. (squashfs-image)[libgcrypt]: Remove. [build]: Call 'make-config.scm' without #:libgcrypt. (docker-image)[config, json]: Remove. [build]: Add GUILE-GCRYPT to the extensions Remove (guix config) from the imported modules. * guix/self.scm (specification->package): Remove "libgcrypt", add "guile-gcrypt". (compiled-guix): Remove #:libgcrypt. [guile-gcrypt]: New variable. [dependencies]: Add it. [*core-modules*]: Remove #:libgcrypt from 'make-config.scm' call. Add #:extensions. [*config*]: Remove #:libgcrypt from 'make-config.scm' call. (%dependency-variables): Remove %libgcrypt. (make-config.scm): Remove #:libgcrypt. * build-aux/build-self.scm (guile-gcrypt): New variable. (make-config.scm): Remove #:libgcrypt. (build-program)[fake-gcrypt-hash]: New variable. Add (gcrypt hash) to the imported modules. Adjust load path assignments. * gnu/packages/package-management.scm (guix)[propagated-inputs]: Add GUILE-GCRYPT. [arguments]: In 'wrap-program' phase, add GUILE-GCRYPT to the search path.
2018-08-31 17:07:07 +02:00
#:use-module (gcrypt hash)
#:use-module (guix build utils)
#:use-module (guix base16)
#:use-module (srfi srfi-11)
#:use-module (rnrs io ports)
#:use-module (ice-9 ftw)
#:use-module (guix serialization)
#:export (nar-sha256
deduplicate))
;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
;; 'port-position' throws to 'out-of-range' when the offset is great than or
;; equal to 2^32: <https://bugs.gnu.org/32161>.
(define (counting-wrapper-port output-port)
"Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
retrieve the number of bytes written to OUTPUT-PORT."
(let ((byte-count 0))
(values (make-custom-binary-output-port "counting-wrapper"
(lambda (bytes offset count)
(put-bytevector output-port bytes
offset count)
(set! byte-count
(+ byte-count count))
count)
(lambda ()
byte-count)
#f
(lambda ()
(close-port output-port)))
(lambda ()
byte-count))))
(define (nar-sha256 file)
"Gives the sha256 hash of a file and the size of the file in nar form."
(let*-values (((port get-hash) (open-sha256-port))
((wrapper get-size) (counting-wrapper-port port)))
(write-file file wrapper)
(force-output wrapper)
(force-output port)
(let ((hash (get-hash))
(size (get-size)))
(close-port wrapper)
(values hash size))))
(define (tempname-in directory)
"Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
unused by the time you create anything with that name, but a good shot."
(let ((const-part (string-append directory "/.tmp-link-"
(number->string (getpid)))))
(let try ((guess-part
(number->string (random most-positive-fixnum) 16)))
(if (file-exists? (string-append const-part "-" guess-part))
(try (number->string (random most-positive-fixnum) 16))
(string-append const-part "-" guess-part)))))
(define* (get-temp-link target #:optional (link-prefix (dirname target)))
"Like mkstemp!, but instead of creating a new file and giving you the name,
it creates a new hardlink to TARGET and gives you the name. Since
cross-file-system hardlinks don't work, the temp link must be created on the
same file system - where in that file system it is can be controlled by
LINK-PREFIX."
(let try ((tempname (tempname-in link-prefix)))
(catch 'system-error
(lambda ()
(link target tempname)
tempname)
(lambda args
(if (= (system-error-errno args) EEXIST)
(try (tempname-in link-prefix))
(apply throw args))))))
;; There are 3 main kinds of errors we can get from hardlinking: "Too many
;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
;; "can't fit more stuff in this directory" (ENOSPC).
(define* (replace-with-link target to-replace
#:key (swap-directory (dirname target)))
"Atomically replace the file TO-REPLACE with a link to TARGET. Use
SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
and EMLINK, TO-REPLACE is left unchanged.
Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
(define temp-link
(catch 'system-error
(lambda ()
(get-temp-link target swap-directory))
(lambda args
;; We get ENOSPC when we can't fit an additional entry in
;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its
;; maximum number of links.
(if (memv (system-error-errno args) `(,ENOSPC ,EMLINK))
#f
(apply throw args)))))
;; If we couldn't create TEMP-LINK, that's OK: just don't do the
;; replacement, which means TO-REPLACE won't be deduplicated.
(when temp-link
(let* ((parent (dirname to-replace))
(stat (stat parent)))
(make-file-writable parent)
(catch 'system-error
(lambda ()
(rename-file temp-link to-replace))
(lambda args
(delete-file temp-link)
(unless (= EMLINK (system-error-errno args))
(apply throw args))))
;; Restore PARENT's mtime and permissions.
(set-file-time parent stat)
(chmod parent (stat:mode stat)))))
(define* (deduplicate path hash #:key (store %store-directory))
"Check if a store item with sha256 hash HASH already exists. If so,
replace PATH with a hardlink to the already-existing one. If not, register
PATH so that future duplicates can hardlink to it. PATH is assumed to be
under STORE."
(let* ((links-directory (string-append store "/.links"))
(link-file (string-append links-directory "/"
(bytevector->base16-string hash))))
(mkdir-p links-directory)
(if (eq? 'directory (stat:type (lstat path)))
;; Can't hardlink directories, so hardlink their atoms.
(for-each (lambda (file)
(unless (or (member file '("." ".."))
(and (string=? path store)
(string=? file ".links")))
(let ((file (string-append path "/" file)))
(deduplicate file (nar-sha256 file)
#:store store))))
(scandir path))
(if (file-exists? link-file)
(replace-with-link link-file path
#:swap-directory links-directory)
(catch 'system-error
(lambda ()
(link path link-file))
(lambda args
(let ((errno (system-error-errno args)))
(cond ((= errno EEXIST)
;; Someone else put an entry for PATH in
;; LINKS-DIRECTORY before we could. Let's use it.
(replace-with-link path link-file
#:swap-directory links-directory))
((= errno ENOSPC)
;; There's not enough room in the directory index for
;; more entries in .links, but that's fine: we can
;; just stop.
#f)
((= errno EMLINK)
;; PATH has reached the maximum number of links, but
;; that's OK: we just can't deduplicate it more.
#f)
(else (apply throw args))))))))))