Add (guix store deduplication).
* guix/store/database.scm (register-path): Add #:deduplicate? and call 'deduplicate' when it's true. (counting-wrapper-port, nar-sha256): Move to... * guix/store/deduplication.scm: ... here. New file. * tests/store-deduplication.scm: New file. * Makefile.am (STORE_MODULES): Add deduplication.scm. (SCM_TESTS) [HAVE_GUILE_SQLITE3]: Add store-deduplication.scm. Co-authored-by: Ludovic Courtès <ludo@gnu.org>
This commit is contained in:
parent
285cc75c31
commit
bf5bf5778c
|
@ -259,7 +259,8 @@ endif BUILD_DAEMON_OFFLOAD
|
||||||
|
|
||||||
# Scheme implementation of the build daemon and related functionality.
|
# Scheme implementation of the build daemon and related functionality.
|
||||||
STORE_MODULES = \
|
STORE_MODULES = \
|
||||||
guix/store/database.scm
|
guix/store/database.scm \
|
||||||
|
guix/store/deduplication.scm
|
||||||
|
|
||||||
if HAVE_GUILE_SQLITE3
|
if HAVE_GUILE_SQLITE3
|
||||||
MODULES += $(STORE_MODULES)
|
MODULES += $(STORE_MODULES)
|
||||||
|
@ -392,7 +393,8 @@ endif
|
||||||
if HAVE_GUILE_SQLITE3
|
if HAVE_GUILE_SQLITE3
|
||||||
|
|
||||||
SCM_TESTS += \
|
SCM_TESTS += \
|
||||||
tests/store-database.scm
|
tests/store-database.scm \
|
||||||
|
tests/store-deduplication.scm
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -21,10 +21,9 @@
|
||||||
#:use-module (sqlite3)
|
#:use-module (sqlite3)
|
||||||
#:use-module (guix config)
|
#:use-module (guix config)
|
||||||
#:use-module (guix serialization)
|
#:use-module (guix serialization)
|
||||||
|
#:use-module (guix store deduplication)
|
||||||
#:use-module (guix base16)
|
#:use-module (guix base16)
|
||||||
#:use-module (guix hash)
|
|
||||||
#:use-module (guix build syscalls)
|
#:use-module (guix build syscalls)
|
||||||
#:use-module (rnrs io ports)
|
|
||||||
#:use-module (srfi srfi-11)
|
#:use-module (srfi srfi-11)
|
||||||
#:use-module (srfi srfi-19)
|
#:use-module (srfi srfi-19)
|
||||||
#:use-module (ice-9 match)
|
#:use-module (ice-9 match)
|
||||||
|
@ -140,39 +139,6 @@ bytes of the store item denoted by PATH after being converted to nar form."
|
||||||
;;; High-level interface.
|
;;; High-level interface.
|
||||||
;;;
|
;;;
|
||||||
|
|
||||||
;; XXX: Would it be better to just make WRITE-FILE give size as well? I question
|
|
||||||
;; the general utility of this approach.
|
|
||||||
(define (counting-wrapper-port output-port)
|
|
||||||
"Some custom ports don't implement GET-POSITION at all. But if we want to
|
|
||||||
figure out how many bytes are being written, we will want to use that. So this
|
|
||||||
makes a wrapper around a port which implements GET-POSITION."
|
|
||||||
(let ((byte-count 0))
|
|
||||||
(make-custom-binary-output-port "counting-wrapper"
|
|
||||||
(lambda (bytes offset count)
|
|
||||||
(set! byte-count
|
|
||||||
(+ byte-count count))
|
|
||||||
(put-bytevector output-port bytes
|
|
||||||
offset count)
|
|
||||||
count)
|
|
||||||
(lambda ()
|
|
||||||
byte-count)
|
|
||||||
#f
|
|
||||||
(lambda ()
|
|
||||||
(close-port output-port)))))
|
|
||||||
|
|
||||||
|
|
||||||
(define (nar-sha256 file)
|
|
||||||
"Gives the sha256 hash of a file and the size of the file in nar form."
|
|
||||||
(let-values (((port get-hash) (open-sha256-port)))
|
|
||||||
(let ((wrapper (counting-wrapper-port port)))
|
|
||||||
(write-file file wrapper)
|
|
||||||
(force-output wrapper)
|
|
||||||
(force-output port)
|
|
||||||
(let ((hash (get-hash))
|
|
||||||
(size (port-position wrapper)))
|
|
||||||
(close-port wrapper)
|
|
||||||
(values hash size)))))
|
|
||||||
|
|
||||||
;; TODO: Factorize with that in (gnu build install).
|
;; TODO: Factorize with that in (gnu build install).
|
||||||
(define (reset-timestamps file)
|
(define (reset-timestamps file)
|
||||||
"Reset the modification time on FILE and on all the files it contains, if
|
"Reset the modification time on FILE and on all the files it contains, if
|
||||||
|
@ -211,7 +177,7 @@ it's a directory."
|
||||||
|
|
||||||
(define* (register-path path
|
(define* (register-path path
|
||||||
#:key (references '()) deriver prefix
|
#:key (references '()) deriver prefix
|
||||||
state-directory)
|
state-directory (deduplicate? #t))
|
||||||
;; Priority for options: first what is given, then environment variables,
|
;; Priority for options: first what is given, then environment variables,
|
||||||
;; then defaults. %state-directory, %store-directory, and
|
;; then defaults. %state-directory, %store-directory, and
|
||||||
;; %store-database-directory already handle the "environment variables /
|
;; %store-database-directory already handle the "environment variables /
|
||||||
|
@ -262,4 +228,7 @@ be used internally by the daemon's build hook."
|
||||||
#:deriver deriver
|
#:deriver deriver
|
||||||
#:hash (string-append "sha256:"
|
#:hash (string-append "sha256:"
|
||||||
(bytevector->base16-string hash))
|
(bytevector->base16-string hash))
|
||||||
#:nar-size nar-size))))
|
#:nar-size nar-size)
|
||||||
|
|
||||||
|
(when deduplicate?
|
||||||
|
(deduplicate real-path hash #:store store-dir)))))
|
||||||
|
|
|
@ -0,0 +1,148 @@
|
||||||
|
;;; GNU Guix --- Functional package management for GNU
|
||||||
|
;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
|
||||||
|
;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
|
||||||
|
;;;
|
||||||
|
;;; This file is part of GNU Guix.
|
||||||
|
;;;
|
||||||
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
||||||
|
;;; under the terms of the GNU General Public License as published by
|
||||||
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||||
|
;;; your option) any later version.
|
||||||
|
;;;
|
||||||
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
||||||
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
;;; GNU General Public License for more details.
|
||||||
|
;;;
|
||||||
|
;;; You should have received a copy of the GNU General Public License
|
||||||
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
;;; This houses stuff we do to files when they arrive at the store - resetting
|
||||||
|
;;; timestamps, deduplicating, etc.
|
||||||
|
|
||||||
|
(define-module (guix store deduplication)
|
||||||
|
#:use-module (guix hash)
|
||||||
|
#:use-module (guix build utils)
|
||||||
|
#:use-module (guix base16)
|
||||||
|
#:use-module (srfi srfi-11)
|
||||||
|
#:use-module (rnrs io ports)
|
||||||
|
#:use-module (ice-9 ftw)
|
||||||
|
#:use-module (guix serialization)
|
||||||
|
#:export (nar-sha256
|
||||||
|
deduplicate))
|
||||||
|
|
||||||
|
;; Would it be better to just make WRITE-FILE give size as well? I question
|
||||||
|
;; the general utility of this approach.
|
||||||
|
(define (counting-wrapper-port output-port)
|
||||||
|
"Some custom ports don't implement GET-POSITION at all. But if we want to
|
||||||
|
figure out how many bytes are being written, we will want to use that. So this
|
||||||
|
makes a wrapper around a port which implements GET-POSITION."
|
||||||
|
(let ((byte-count 0))
|
||||||
|
(make-custom-binary-output-port "counting-wrapper"
|
||||||
|
(lambda (bytes offset count)
|
||||||
|
(set! byte-count
|
||||||
|
(+ byte-count count))
|
||||||
|
(put-bytevector output-port bytes
|
||||||
|
offset count)
|
||||||
|
count)
|
||||||
|
(lambda ()
|
||||||
|
byte-count)
|
||||||
|
#f
|
||||||
|
(lambda ()
|
||||||
|
(close-port output-port)))))
|
||||||
|
|
||||||
|
(define (nar-sha256 file)
|
||||||
|
"Gives the sha256 hash of a file and the size of the file in nar form."
|
||||||
|
(let-values (((port get-hash) (open-sha256-port)))
|
||||||
|
(let ((wrapper (counting-wrapper-port port)))
|
||||||
|
(write-file file wrapper)
|
||||||
|
(force-output wrapper)
|
||||||
|
(force-output port)
|
||||||
|
(let ((hash (get-hash))
|
||||||
|
(size (port-position wrapper)))
|
||||||
|
(close-port wrapper)
|
||||||
|
(values hash size)))))
|
||||||
|
|
||||||
|
(define (tempname-in directory)
|
||||||
|
"Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
|
||||||
|
unused by the time you create anything with that name, but a good shot."
|
||||||
|
(let ((const-part (string-append directory "/.tmp-link-"
|
||||||
|
(number->string (getpid)))))
|
||||||
|
(let try ((guess-part
|
||||||
|
(number->string (random most-positive-fixnum) 16)))
|
||||||
|
(if (file-exists? (string-append const-part "-" guess-part))
|
||||||
|
(try (number->string (random most-positive-fixnum) 16))
|
||||||
|
(string-append const-part "-" guess-part)))))
|
||||||
|
|
||||||
|
(define* (get-temp-link target #:optional (link-prefix (dirname target)))
|
||||||
|
"Like mkstemp!, but instead of creating a new file and giving you the name,
|
||||||
|
it creates a new hardlink to TARGET and gives you the name. Since
|
||||||
|
cross-filesystem hardlinks don't work, the temp link must be created on the
|
||||||
|
same filesystem - where in that filesystem it is can be controlled by
|
||||||
|
LINK-PREFIX."
|
||||||
|
(let try ((tempname (tempname-in link-prefix)))
|
||||||
|
(catch 'system-error
|
||||||
|
(lambda ()
|
||||||
|
(link target tempname)
|
||||||
|
tempname)
|
||||||
|
(lambda (args)
|
||||||
|
(if (= (system-error-errno args) EEXIST)
|
||||||
|
(try (tempname-in link-prefix))
|
||||||
|
(throw 'system-error args))))))
|
||||||
|
|
||||||
|
;; There are 3 main kinds of errors we can get from hardlinking: "Too many
|
||||||
|
;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
|
||||||
|
;; "can't fit more stuff in this directory" (ENOSPC).
|
||||||
|
|
||||||
|
(define (replace-with-link target to-replace)
|
||||||
|
"Atomically replace the file TO-REPLACE with a link to TARGET. Note: TARGET
|
||||||
|
and TO-REPLACE must be on the same file system."
|
||||||
|
(let ((temp-link (get-temp-link target (dirname to-replace))))
|
||||||
|
(rename-file temp-link to-replace)))
|
||||||
|
|
||||||
|
(define-syntax-rule (false-if-system-error (errors ...) exp ...)
|
||||||
|
"Given ERRORS, a list of system error codes to ignore, evaluates EXP... and
|
||||||
|
return #f if any of the system error codes in the given list are thrown."
|
||||||
|
(catch 'system-error
|
||||||
|
(lambda ()
|
||||||
|
exp ...)
|
||||||
|
(lambda args
|
||||||
|
(if (member (system-error-errno args) (list errors ...))
|
||||||
|
#f
|
||||||
|
(apply throw args)))))
|
||||||
|
|
||||||
|
(define* (deduplicate path hash #:key (store %store-directory))
|
||||||
|
"Check if a store item with sha256 hash HASH already exists. If so,
|
||||||
|
replace PATH with a hardlink to the already-existing one. If not, register
|
||||||
|
PATH so that future duplicates can hardlink to it. PATH is assumed to be
|
||||||
|
under STORE."
|
||||||
|
(let* ((links-directory (string-append store "/.links"))
|
||||||
|
(link-file (string-append links-directory "/"
|
||||||
|
(bytevector->base16-string hash))))
|
||||||
|
(mkdir-p links-directory)
|
||||||
|
(if (file-is-directory? path)
|
||||||
|
;; Can't hardlink directories, so hardlink their atoms.
|
||||||
|
(for-each (lambda (file)
|
||||||
|
(unless (member file '("." ".."))
|
||||||
|
(deduplicate file (nar-sha256 file)
|
||||||
|
#:store store)))
|
||||||
|
(scandir path))
|
||||||
|
(if (file-exists? link-file)
|
||||||
|
(false-if-system-error (EMLINK)
|
||||||
|
(replace-with-link link-file path))
|
||||||
|
(catch 'system-error
|
||||||
|
(lambda ()
|
||||||
|
(link path link-file))
|
||||||
|
(lambda args
|
||||||
|
(let ((errno (system-error-errno args)))
|
||||||
|
(cond ((= errno EEXIST)
|
||||||
|
;; Someone else put an entry for PATH in
|
||||||
|
;; LINKS-DIRECTORY before we could. Let's use it.
|
||||||
|
(false-if-system-error (EMLINK)
|
||||||
|
(replace-with-link path link-file)))
|
||||||
|
((= errno ENOSPC)
|
||||||
|
;; There's not enough room in the directory index for
|
||||||
|
;; more entries in .links, but that's fine: we can
|
||||||
|
;; just stop.
|
||||||
|
#f)
|
||||||
|
(else (apply throw args))))))))))
|
|
@ -0,0 +1,64 @@
|
||||||
|
;;; GNU Guix --- Functional package management for GNU
|
||||||
|
;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
|
||||||
|
;;;
|
||||||
|
;;; This file is part of GNU Guix.
|
||||||
|
;;;
|
||||||
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
||||||
|
;;; under the terms of the GNU General Public License as published by
|
||||||
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||||
|
;;; your option) any later version.
|
||||||
|
;;;
|
||||||
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
||||||
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
;;; GNU General Public License for more details.
|
||||||
|
;;;
|
||||||
|
;;; You should have received a copy of the GNU General Public License
|
||||||
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
(define-module (test-store-deduplication)
|
||||||
|
#:use-module (guix tests)
|
||||||
|
#:use-module (guix store deduplication)
|
||||||
|
#:use-module (guix hash)
|
||||||
|
#:use-module ((guix utils) #:select (call-with-temporary-directory))
|
||||||
|
#:use-module (guix build utils)
|
||||||
|
#:use-module (rnrs bytevectors)
|
||||||
|
#:use-module (ice-9 binary-ports)
|
||||||
|
#:use-module (srfi srfi-1)
|
||||||
|
#:use-module (srfi srfi-64))
|
||||||
|
|
||||||
|
(test-begin "store-deduplication")
|
||||||
|
|
||||||
|
(test-equal "deduplicate"
|
||||||
|
(cons* #t #f ;inode comparisons
|
||||||
|
2 (make-list 5 6)) ;'nlink' values
|
||||||
|
|
||||||
|
(call-with-temporary-directory
|
||||||
|
(lambda (store)
|
||||||
|
(let ((data (string->utf8 "Hello, world!"))
|
||||||
|
(identical (map (lambda (n)
|
||||||
|
(string-append store "/" (number->string n)))
|
||||||
|
(iota 5)))
|
||||||
|
(unique (string-append store "/unique")))
|
||||||
|
(for-each (lambda (file)
|
||||||
|
(call-with-output-file file
|
||||||
|
(lambda (port)
|
||||||
|
(put-bytevector port data))))
|
||||||
|
identical)
|
||||||
|
(call-with-output-file unique
|
||||||
|
(lambda (port)
|
||||||
|
(put-bytevector port (string->utf8 "This is unique."))))
|
||||||
|
|
||||||
|
(for-each (lambda (file)
|
||||||
|
(deduplicate file (sha256 data) #:store store))
|
||||||
|
identical)
|
||||||
|
(deduplicate unique (nar-sha256 unique) #:store store)
|
||||||
|
|
||||||
|
;; (system (string-append "ls -lRia " store))
|
||||||
|
(cons* (apply = (map (compose stat:ino stat) identical))
|
||||||
|
(= (stat:ino (stat unique))
|
||||||
|
(stat:ino (stat (car identical))))
|
||||||
|
(stat:nlink (stat unique))
|
||||||
|
(map (compose stat:nlink stat) identical))))))
|
||||||
|
|
||||||
|
(test-end "store-deduplication")
|
Loading…
Reference in New Issue