union: Don't traverse sub-directories only found in one element of the union.

This significantly reduces I/O when building profiles, especially with
lots of package-specific sub-directories (such as 'share/emacs/24.3',
'texmf', etc.)

* guix/build/union.scm (union-build)[file-tree](others-have-it?): New
  procedure.  Use it in the 'enter?' parameter of 'file-system-fold';
  change 'skip' parameter accordingly.
* tests/union.scm ("union-build"): Ensure that 'include' is a symlink
  and 'bin' is a directory.
This commit is contained in:
Ludovic Courtès 2013-09-02 23:03:03 +02:00
parent 165fd9d5e6
commit 43dd92024a
2 changed files with 36 additions and 5 deletions

View File

@ -105,7 +105,22 @@ single leaf."
the DIRECTORIES." the DIRECTORIES."
(define (file-tree dir) (define (file-tree dir)
;; Return the contents of DIR as a tree. ;; Return the contents of DIR as a tree.
(match (file-system-fold (const #t)
(define (others-have-it? subdir)
;; Return #t if other elements of DIRECTORIES have SUBDIR.
(let ((subdir (substring subdir (string-length dir))))
(any (lambda (other)
(and (not (string=? other dir))
(file-exists? (string-append other "/" subdir))))
directories)))
(match (file-system-fold (lambda (subdir stat result) ; enter?
;; No need to traverse DIR since there's
;; nothing to union it with. Thus, we avoid
;; creating a gazillon symlinks (think
;; share/emacs/24.3, share/texmf, etc.)
(or (string=? subdir dir)
(others-have-it? subdir)))
(lambda (file stat result) ; leaf (lambda (file stat result) ; leaf
(match result (match result
(((siblings ...) rest ...) (((siblings ...) rest ...)
@ -117,7 +132,12 @@ the DIRECTORIES."
(((leaves ...) (siblings ...) rest ...) (((leaves ...) (siblings ...) rest ...)
`(((,(basename dir) ,@leaves) ,@siblings) `(((,(basename dir) ,@leaves) ,@siblings)
,@rest)))) ,@rest))))
(const #f) ; skip (lambda (dir stat result) ; skip
;; DIR is not available elsewhere, so treat it
;; as a leaf.
(match result
(((siblings ...) rest ...)
`((,dir ,@siblings) ,@rest))))
(lambda (file stat errno result) (lambda (file stat errno result)
(format (current-error-port) "union-build: ~a: ~a~%" (format (current-error-port) "union-build: ~a: ~a~%"
file (strerror errno))) file (strerror errno)))
@ -158,8 +178,9 @@ the DIRECTORIES."
(mkdir output) (mkdir output)
(let loop ((tree (delete-duplicate-leaves (let loop ((tree (delete-duplicate-leaves
(cons "." (cons "."
(tree-union (append-map (compose tree-leaves file-tree) (tree-union
directories))) (append-map (compose tree-leaves file-tree)
(delete-duplicates directories))))
leaf=? leaf=?
resolve-collision)) resolve-collision))
(dir '())) (dir '()))

View File

@ -114,7 +114,17 @@
(file-exists? "bin/ld") (file-exists? "bin/ld")
(file-exists? "lib/libc.so") (file-exists? "lib/libc.so")
(directory-exists? "lib/gcc") (directory-exists? "lib/gcc")
(file-exists? "include/unistd.h")))))) (file-exists? "include/unistd.h")
;; The 'include' sub-directory is only found in
;; glibc-bootstrap, so it should be unified in a
;; straightforward way, without traversing it.
(eq? 'symlink (stat:type (lstat "include")))
;; Conversely, several inputs have a 'bin' sub-directory, so
;; unifying it requires traversing them all, and creating a
;; new 'bin' sub-directory in the profile.
(eq? 'directory (stat:type (lstat "bin"))))))))
(test-end) (test-end)