grafts: Make grafting faster.
* guix/build/graft.scm (replace-store-references): Reimplement for faster grafting. Use binary I/O instead of textual I/O. Replace 'mapping' argument (an alist) with 'replacement-table' (a vhash). (rewrite-directory): Adapt to mapping argument change in 'replace-store-references'. Remove 'with-fluids' that previously set '%default-port-encoding' to #f, since we now use binary I/O. (define-inline, hash-length): New macros. (nix-base32-char?): New variable.
This commit is contained in:
parent
ba6d25f3b9
commit
5a1add373a
|
@ -1,5 +1,6 @@
|
||||||
;;; GNU Guix --- Functional package management for GNU
|
;;; GNU Guix --- Functional package management for GNU
|
||||||
;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org>
|
;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org>
|
||||||
|
;;; Copyright © 2016 Mark H Weaver <mhw@netris.org>
|
||||||
;;;
|
;;;
|
||||||
;;; This file is part of GNU Guix.
|
;;; This file is part of GNU Guix.
|
||||||
;;;
|
;;;
|
||||||
|
@ -20,8 +21,12 @@
|
||||||
#:use-module (guix build utils)
|
#:use-module (guix build utils)
|
||||||
#:use-module (rnrs bytevectors)
|
#:use-module (rnrs bytevectors)
|
||||||
#:use-module (rnrs io ports)
|
#:use-module (rnrs io ports)
|
||||||
|
#:use-module (ice-9 vlist)
|
||||||
#:use-module (ice-9 match)
|
#:use-module (ice-9 match)
|
||||||
#:use-module (ice-9 threads)
|
#:use-module (ice-9 threads)
|
||||||
|
#:use-module (ice-9 binary-ports)
|
||||||
|
#:use-module (srfi srfi-1) ; list library
|
||||||
|
#:use-module (srfi srfi-26) ; cut and cute
|
||||||
#:export (replace-store-references
|
#:export (replace-store-references
|
||||||
rewrite-directory))
|
rewrite-directory))
|
||||||
|
|
||||||
|
@ -38,50 +43,134 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Code:
|
;;; Code:
|
||||||
|
|
||||||
(define* (replace-store-references input output mapping
|
(define-syntax-rule (define-inline name val)
|
||||||
|
(define-syntax name (identifier-syntax val)))
|
||||||
|
|
||||||
|
(define-inline hash-length 32)
|
||||||
|
|
||||||
|
(define nix-base32-char?
|
||||||
|
(cute char-set-contains?
|
||||||
|
;; ASCII digits and lower case letters except e o t u
|
||||||
|
(string->char-set "0123456789abcdfghijklmnpqrsvwxyz")
|
||||||
|
<>))
|
||||||
|
|
||||||
|
(define* (replace-store-references input output replacement-table
|
||||||
#:optional (store (%store-directory)))
|
#:optional (store (%store-directory)))
|
||||||
"Read data from INPUT, replacing store references according to MAPPING, and
|
"Read data from INPUT, replacing store references according to
|
||||||
writing the result to OUTPUT."
|
REPLACEMENT-TABLE, and writing the result to OUTPUT. REPLACEMENT-TABLE is a
|
||||||
(define pattern
|
vhash that maps strings (original hashes) to bytevectors (replacement hashes).
|
||||||
(let ((nix-base32-chars
|
Note: We use string keys to work around the fact that guile-2.0 hashes all
|
||||||
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
|
bytevectors to the same value."
|
||||||
#\a #\b #\c #\d #\f #\g #\h #\i #\j #\k #\l #\m #\n
|
|
||||||
#\p #\q #\r #\s #\v #\w #\x #\y #\z)))
|
|
||||||
`(,@(map char-set (string->list store))
|
|
||||||
,(char-set #\/)
|
|
||||||
,@(make-list 32 (list->char-set nix-base32-chars))
|
|
||||||
,(char-set #\-))))
|
|
||||||
|
|
||||||
;; We cannot use `regexp-exec' here because it cannot deal with strings
|
(define (lookup-replacement s)
|
||||||
;; containing NUL characters, hence 'fold-port-matches'.
|
(match (vhash-assoc s replacement-table)
|
||||||
(with-fluids ((%default-port-encoding #f))
|
|
||||||
(when (file-port? input)
|
|
||||||
(setvbuf input _IOFBF 65536))
|
|
||||||
(when (file-port? output)
|
|
||||||
(setvbuf output _IOFBF 65536))
|
|
||||||
|
|
||||||
(let* ((len (+ 34 (string-length store)))
|
|
||||||
(mapping (map (match-lambda
|
|
||||||
((origin . replacement)
|
((origin . replacement)
|
||||||
(unless (string=? (string-drop origin len)
|
replacement)
|
||||||
(string-drop replacement len))
|
(#f #f)))
|
||||||
(error "invalid replacement" origin replacement))
|
|
||||||
(cons (string-take origin len)
|
(define (optimize-u8-predicate pred)
|
||||||
(string-take replacement len))))
|
(cute vector-ref
|
||||||
mapping)))
|
(list->vector (map pred (iota 256)))
|
||||||
(fold-port-matches (lambda (string result)
|
<>))
|
||||||
(match (assoc-ref mapping string)
|
|
||||||
(#f
|
(define nix-base32-byte?
|
||||||
(put-bytevector output (string->utf8 string)))
|
(optimize-u8-predicate
|
||||||
((= string->utf8 replacement)
|
(compose nix-base32-char?
|
||||||
(put-bytevector output replacement)))
|
integer->char)))
|
||||||
#t)
|
|
||||||
#f
|
(define (dash? byte) (= byte 45))
|
||||||
pattern
|
|
||||||
input
|
(define request-size (expt 2 20)) ; 1 MiB
|
||||||
(lambda (char result) ;unmatched
|
|
||||||
(put-u8 output (char->integer char))
|
;; We scan the file for the following 33-byte pattern: 32 bytes of
|
||||||
result)))))
|
;; nix-base32 characters followed by a dash. To accommodate large files,
|
||||||
|
;; we do not read the entire file, but instead work on buffers of up to
|
||||||
|
;; 'request-size' bytes. To ensure that every 33-byte sequence appears
|
||||||
|
;; entirely within exactly one buffer, adjacent buffers must overlap,
|
||||||
|
;; i.e. they must share 32 byte positions. We accomplish this by
|
||||||
|
;; "ungetting" the last 32 bytes of each buffer before reading the next
|
||||||
|
;; buffer, unless we know that we've reached the end-of-file.
|
||||||
|
(let ((buffer (make-bytevector request-size)))
|
||||||
|
(let loop ()
|
||||||
|
;; Note: We avoid 'get-bytevector-n' to work around
|
||||||
|
;; <http://bugs.gnu.org/17466>.
|
||||||
|
(match (get-bytevector-n! input buffer 0 request-size)
|
||||||
|
((? eof-object?) 'done)
|
||||||
|
(end
|
||||||
|
;; We scan the buffer for dashes that might be preceded by a
|
||||||
|
;; nix-base32 hash. The key optimization here is that whenever we
|
||||||
|
;; find a NON-nix-base32 character at position 'i', we know that it
|
||||||
|
;; cannot be part of a hash, so the earliest position where the next
|
||||||
|
;; hash could start is i+1 with the following dash at position i+33.
|
||||||
|
;;
|
||||||
|
;; Since nix-base32 characters comprise only 1/8 of the 256 possible
|
||||||
|
;; byte values, and exclude some of the most common letters in
|
||||||
|
;; English text (e t o u), in practice we can advance by 33 positions
|
||||||
|
;; most of the time.
|
||||||
|
(let scan-from ((i hash-length) (written 0))
|
||||||
|
;; 'i' is the first position where we look for a dash. 'written'
|
||||||
|
;; is the number of bytes in the buffer that have already been
|
||||||
|
;; written.
|
||||||
|
(if (< i end)
|
||||||
|
(let ((byte (bytevector-u8-ref buffer i)))
|
||||||
|
(cond ((and (dash? byte)
|
||||||
|
;; We've found a dash. Note that we do not know
|
||||||
|
;; whether the preceeding 32 bytes are nix-base32
|
||||||
|
;; characters, but we do not need to know. If
|
||||||
|
;; they are not, the following lookup will fail.
|
||||||
|
(lookup-replacement
|
||||||
|
(string-tabulate (lambda (j)
|
||||||
|
(integer->char
|
||||||
|
(bytevector-u8-ref buffer
|
||||||
|
(+ j (- i hash-length)))))
|
||||||
|
hash-length)))
|
||||||
|
=> (lambda (replacement)
|
||||||
|
;; We've found a hash that needs to be replaced.
|
||||||
|
;; First, write out all bytes preceding the hash
|
||||||
|
;; that have not yet been written.
|
||||||
|
(put-bytevector output buffer written
|
||||||
|
(- i hash-length written))
|
||||||
|
;; Now write the replacement hash.
|
||||||
|
(put-bytevector output replacement)
|
||||||
|
;; Since the byte at position 'i' is a dash,
|
||||||
|
;; which is not a nix-base32 char, the earliest
|
||||||
|
;; position where the next hash might start is
|
||||||
|
;; i+1, and the earliest position where the
|
||||||
|
;; following dash might start is (+ i 1
|
||||||
|
;; hash-length). Also, we have now written up to
|
||||||
|
;; position 'i' in the buffer.
|
||||||
|
(scan-from (+ i 1 hash-length) i)))
|
||||||
|
;; If the byte at position 'i' is a nix-base32 char,
|
||||||
|
;; then the dash we're looking for might be as early as
|
||||||
|
;; the following byte, so we can only advance by 1.
|
||||||
|
((nix-base32-byte? byte)
|
||||||
|
(scan-from (+ i 1) written))
|
||||||
|
;; If the byte at position 'i' is NOT a nix-base32
|
||||||
|
;; char, then the earliest position where the next hash
|
||||||
|
;; might start is i+1, with the following dash at
|
||||||
|
;; position (+ i 1 hash-length).
|
||||||
|
(else
|
||||||
|
(scan-from (+ i 1 hash-length) written))))
|
||||||
|
|
||||||
|
;; We have finished scanning the buffer. Now we determine how
|
||||||
|
;; many bytes have not yet been written, and how many bytes to
|
||||||
|
;; "unget". If 'end' is less than 'request-size' then we read
|
||||||
|
;; less than we asked for, which indicates that we are at EOF,
|
||||||
|
;; so we needn't unget anything. Otherwise, we unget up to
|
||||||
|
;; 'hash-length' bytes (32 bytes). However, we must be careful
|
||||||
|
;; not to unget bytes that have already been written, because
|
||||||
|
;; that would cause them to be written again from the next
|
||||||
|
;; buffer. In practice, this case occurs when a replacement is
|
||||||
|
;; made near the end of the buffer.
|
||||||
|
(let* ((unwritten (- end written))
|
||||||
|
(unget-size (if (= end request-size)
|
||||||
|
(min hash-length unwritten)
|
||||||
|
0))
|
||||||
|
(write-size (- unwritten unget-size)))
|
||||||
|
(put-bytevector output buffer written write-size)
|
||||||
|
(unget-bytevector input buffer (+ written write-size)
|
||||||
|
unget-size)
|
||||||
|
(loop)))))))))
|
||||||
|
|
||||||
(define (rename-matching-files directory mapping)
|
(define (rename-matching-files directory mapping)
|
||||||
"Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is
|
"Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is
|
||||||
|
@ -122,6 +211,35 @@ an exception is caught."
|
||||||
#:optional (store (%store-directory)))
|
#:optional (store (%store-directory)))
|
||||||
"Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of
|
"Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of
|
||||||
file name pairs."
|
file name pairs."
|
||||||
|
|
||||||
|
(define hash-mapping
|
||||||
|
(let* ((prefix (string-append store "/"))
|
||||||
|
(start (string-length prefix))
|
||||||
|
(end (+ start hash-length)))
|
||||||
|
(define (valid-hash? h)
|
||||||
|
(every nix-base32-char? (string->list h)))
|
||||||
|
(define (valid-suffix? s)
|
||||||
|
(string-prefix? "-" s))
|
||||||
|
(define (hash+suffix s)
|
||||||
|
(and (< end (string-length s))
|
||||||
|
(let ((hash (substring s start end))
|
||||||
|
(suffix (substring s end)))
|
||||||
|
(and (string-prefix? prefix s)
|
||||||
|
(valid-hash? hash)
|
||||||
|
(valid-suffix? suffix)
|
||||||
|
(list hash suffix)))))
|
||||||
|
(map (match-lambda
|
||||||
|
(((= hash+suffix (origin-hash suffix))
|
||||||
|
.
|
||||||
|
(= hash+suffix (replacement-hash suffix)))
|
||||||
|
(cons origin-hash (string->utf8 replacement-hash)))
|
||||||
|
((origin . replacement)
|
||||||
|
(error "invalid replacement" origin replacement)))
|
||||||
|
mapping)))
|
||||||
|
|
||||||
|
(define replacement-table
|
||||||
|
(alist->vhash hash-mapping))
|
||||||
|
|
||||||
(define prefix-len
|
(define prefix-len
|
||||||
(string-length directory))
|
(string-length directory))
|
||||||
|
|
||||||
|
@ -138,18 +256,17 @@ file name pairs."
|
||||||
(symlink (call-with-output-string
|
(symlink (call-with-output-string
|
||||||
(lambda (output)
|
(lambda (output)
|
||||||
(replace-store-references (open-input-string target)
|
(replace-store-references (open-input-string target)
|
||||||
output mapping
|
output replacement-table
|
||||||
store)))
|
store)))
|
||||||
dest)))
|
dest)))
|
||||||
((regular)
|
((regular)
|
||||||
(with-fluids ((%default-port-encoding #f))
|
|
||||||
(call-with-input-file file
|
(call-with-input-file file
|
||||||
(lambda (input)
|
(lambda (input)
|
||||||
(call-with-output-file dest
|
(call-with-output-file dest
|
||||||
(lambda (output)
|
(lambda (output)
|
||||||
(replace-store-references input output mapping
|
(replace-store-references input output replacement-table
|
||||||
store)
|
store)
|
||||||
(chmod output (stat:perms stat))))))))
|
(chmod output (stat:perms stat)))))))
|
||||||
((directory)
|
((directory)
|
||||||
(mkdir-p dest))
|
(mkdir-p dest))
|
||||||
(else
|
(else
|
||||||
|
|
Loading…
Reference in New Issue