;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org> ;;; Copyright © 2018, 2019 Ludovic Courtès <ludo@gnu.org> ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. ;;; This houses stuff we do to files when they arrive at the store - resetting ;;; timestamps, deduplicating, etc. (define-module (guix store deduplication) #:use-module (gcrypt hash) #:use-module (guix build utils) #:use-module (guix base16) #:use-module (srfi srfi-11) #:use-module (rnrs io ports) #:use-module (ice-9 ftw) #:use-module (guix serialization) #:export (nar-sha256 deduplicate)) ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where ;; 'port-position' throws to 'out-of-range' when the offset is great than or ;; equal to 2^32: <https://bugs.gnu.org/32161>. (define (counting-wrapper-port output-port) "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to retrieve the number of bytes written to OUTPUT-PORT." (let ((byte-count 0)) (values (make-custom-binary-output-port "counting-wrapper" (lambda (bytes offset count) (put-bytevector output-port bytes offset count) (set! byte-count (+ byte-count count)) count) (lambda () byte-count) #f (lambda () (close-port output-port))) (lambda () byte-count)))) (define (nar-sha256 file) "Gives the sha256 hash of a file and the size of the file in nar form." (let*-values (((port get-hash) (open-sha256-port)) ((wrapper get-size) (counting-wrapper-port port))) (write-file file wrapper) (force-output wrapper) (force-output port) (let ((hash (get-hash)) (size (get-size))) (close-port wrapper) (values hash size)))) (define (tempname-in directory) "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be unused by the time you create anything with that name, but a good shot." (let ((const-part (string-append directory "/.tmp-link-" (number->string (getpid))))) (let try ((guess-part (number->string (random most-positive-fixnum) 16))) (if (file-exists? (string-append const-part "-" guess-part)) (try (number->string (random most-positive-fixnum) 16)) (string-append const-part "-" guess-part))))) (define* (get-temp-link target #:optional (link-prefix (dirname target))) "Like mkstemp!, but instead of creating a new file and giving you the name, it creates a new hardlink to TARGET and gives you the name. Since cross-file-system hardlinks don't work, the temp link must be created on the same file system - where in that file system it is can be controlled by LINK-PREFIX." (let try ((tempname (tempname-in link-prefix))) (catch 'system-error (lambda () (link target tempname) tempname) (lambda args (if (= (system-error-errno args) EEXIST) (try (tempname-in link-prefix)) (apply throw args)))))) ;; There are 3 main kinds of errors we can get from hardlinking: "Too many ;; things link to this" (EMLINK), "this link already exists" (EEXIST), and ;; "can't fit more stuff in this directory" (ENOSPC). (define* (replace-with-link target to-replace #:key (swap-directory (dirname target))) "Atomically replace the file TO-REPLACE with a link to TARGET. Use SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC and EMLINK, TO-REPLACE is left unchanged. Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system." (define temp-link (catch 'system-error (lambda () (get-temp-link target swap-directory)) (lambda args ;; We get ENOSPC when we can't fit an additional entry in ;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its ;; maximum number of links. (if (memv (system-error-errno args) `(,ENOSPC ,EMLINK)) #f (apply throw args))))) ;; If we couldn't create TEMP-LINK, that's OK: just don't do the ;; replacement, which means TO-REPLACE won't be deduplicated. (when temp-link (let* ((parent (dirname to-replace)) (stat (stat parent))) (make-file-writable parent) (catch 'system-error (lambda () (rename-file temp-link to-replace)) (lambda args (delete-file temp-link) (unless (= EMLINK (system-error-errno args)) (apply throw args)))) ;; Restore PARENT's mtime and permissions. (set-file-time parent stat) (chmod parent (stat:mode stat))))) (define* (deduplicate path hash #:key (store %store-directory)) "Check if a store item with sha256 hash HASH already exists. If so, replace PATH with a hardlink to the already-existing one. If not, register PATH so that future duplicates can hardlink to it. PATH is assumed to be under STORE." (let* ((links-directory (string-append store "/.links")) (link-file (string-append links-directory "/" (bytevector->base16-string hash)))) (mkdir-p links-directory) (if (eq? 'directory (stat:type (lstat path))) ;; Can't hardlink directories, so hardlink their atoms. (for-each (lambda (file) (unless (or (member file '("." "..")) (and (string=? path store) (string=? file ".links"))) (let ((file (string-append path "/" file))) (deduplicate file (nar-sha256 file) #:store store)))) (scandir path)) (if (file-exists? link-file) (replace-with-link link-file path #:swap-directory links-directory) (catch 'system-error (lambda () (link path link-file)) (lambda args (let ((errno (system-error-errno args))) (cond ((= errno EEXIST) ;; Someone else put an entry for PATH in ;; LINKS-DIRECTORY before we could. Let's use it. (replace-with-link path link-file #:swap-directory links-directory)) ((= errno ENOSPC) ;; There's not enough room in the directory index for ;; more entries in .links, but that's fine: we can ;; just stop. #f) ((= errno EMLINK) ;; PATH has reached the maximum number of links, but ;; that's OK: we just can't deduplicate it more. #f) (else (apply throw args))))))))))