grafts: Make grafting faster.

* guix/build/graft.scm (replace-store-references): Reimplement for
faster grafting.  Use binary I/O instead of textual I/O.  Replace
'mapping' argument (an alist) with 'replacement-table' (a vhash).
(rewrite-directory): Adapt to mapping argument change in
'replace-store-references'.  Remove 'with-fluids' that previously set
'%default-port-encoding' to #f, since we now use binary I/O.
(define-inline, hash-length): New macros.
(nix-base32-char?): New variable.
This commit is contained in:
Mark H Weaver 2016-03-09 01:23:53 -05:00
parent ba6d25f3b9
commit 5a1add373a
No known key found for this signature in database
GPG Key ID: 7CEF29847562C516
1 changed files with 167 additions and 50 deletions

View File

@ -1,5 +1,6 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2016 Mark H Weaver <mhw@netris.org>
;;;
;;; This file is part of GNU Guix.
;;;
@ -20,8 +21,12 @@
#:use-module (guix build utils)
#:use-module (rnrs bytevectors)
#:use-module (rnrs io ports)
#:use-module (ice-9 vlist)
#:use-module (ice-9 match)
#:use-module (ice-9 threads)
#:use-module (ice-9 binary-ports)
#:use-module (srfi srfi-1) ; list library
#:use-module (srfi srfi-26) ; cut and cute
#:export (replace-store-references
rewrite-directory))
@ -38,50 +43,134 @@
;;;
;;; Code:
(define* (replace-store-references input output mapping
(define-syntax-rule (define-inline name val)
(define-syntax name (identifier-syntax val)))
(define-inline hash-length 32)
(define nix-base32-char?
(cute char-set-contains?
;; ASCII digits and lower case letters except e o t u
(string->char-set "0123456789abcdfghijklmnpqrsvwxyz")
<>))
(define* (replace-store-references input output replacement-table
#:optional (store (%store-directory)))
"Read data from INPUT, replacing store references according to MAPPING, and
writing the result to OUTPUT."
(define pattern
(let ((nix-base32-chars
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\a #\b #\c #\d #\f #\g #\h #\i #\j #\k #\l #\m #\n
#\p #\q #\r #\s #\v #\w #\x #\y #\z)))
`(,@(map char-set (string->list store))
,(char-set #\/)
,@(make-list 32 (list->char-set nix-base32-chars))
,(char-set #\-))))
"Read data from INPUT, replacing store references according to
REPLACEMENT-TABLE, and writing the result to OUTPUT. REPLACEMENT-TABLE is a
vhash that maps strings (original hashes) to bytevectors (replacement hashes).
Note: We use string keys to work around the fact that guile-2.0 hashes all
bytevectors to the same value."
;; We cannot use `regexp-exec' here because it cannot deal with strings
;; containing NUL characters, hence 'fold-port-matches'.
(with-fluids ((%default-port-encoding #f))
(when (file-port? input)
(setvbuf input _IOFBF 65536))
(when (file-port? output)
(setvbuf output _IOFBF 65536))
(define (lookup-replacement s)
(match (vhash-assoc s replacement-table)
((origin . replacement)
replacement)
(#f #f)))
(let* ((len (+ 34 (string-length store)))
(mapping (map (match-lambda
((origin . replacement)
(unless (string=? (string-drop origin len)
(string-drop replacement len))
(error "invalid replacement" origin replacement))
(cons (string-take origin len)
(string-take replacement len))))
mapping)))
(fold-port-matches (lambda (string result)
(match (assoc-ref mapping string)
(#f
(put-bytevector output (string->utf8 string)))
((= string->utf8 replacement)
(put-bytevector output replacement)))
#t)
#f
pattern
input
(lambda (char result) ;unmatched
(put-u8 output (char->integer char))
result)))))
(define (optimize-u8-predicate pred)
(cute vector-ref
(list->vector (map pred (iota 256)))
<>))
(define nix-base32-byte?
(optimize-u8-predicate
(compose nix-base32-char?
integer->char)))
(define (dash? byte) (= byte 45))
(define request-size (expt 2 20)) ; 1 MiB
;; We scan the file for the following 33-byte pattern: 32 bytes of
;; nix-base32 characters followed by a dash. To accommodate large files,
;; we do not read the entire file, but instead work on buffers of up to
;; 'request-size' bytes. To ensure that every 33-byte sequence appears
;; entirely within exactly one buffer, adjacent buffers must overlap,
;; i.e. they must share 32 byte positions. We accomplish this by
;; "ungetting" the last 32 bytes of each buffer before reading the next
;; buffer, unless we know that we've reached the end-of-file.
(let ((buffer (make-bytevector request-size)))
(let loop ()
;; Note: We avoid 'get-bytevector-n' to work around
;; <http://bugs.gnu.org/17466>.
(match (get-bytevector-n! input buffer 0 request-size)
((? eof-object?) 'done)
(end
;; We scan the buffer for dashes that might be preceded by a
;; nix-base32 hash. The key optimization here is that whenever we
;; find a NON-nix-base32 character at position 'i', we know that it
;; cannot be part of a hash, so the earliest position where the next
;; hash could start is i+1 with the following dash at position i+33.
;;
;; Since nix-base32 characters comprise only 1/8 of the 256 possible
;; byte values, and exclude some of the most common letters in
;; English text (e t o u), in practice we can advance by 33 positions
;; most of the time.
(let scan-from ((i hash-length) (written 0))
;; 'i' is the first position where we look for a dash. 'written'
;; is the number of bytes in the buffer that have already been
;; written.
(if (< i end)
(let ((byte (bytevector-u8-ref buffer i)))
(cond ((and (dash? byte)
;; We've found a dash. Note that we do not know
;; whether the preceeding 32 bytes are nix-base32
;; characters, but we do not need to know. If
;; they are not, the following lookup will fail.
(lookup-replacement
(string-tabulate (lambda (j)
(integer->char
(bytevector-u8-ref buffer
(+ j (- i hash-length)))))
hash-length)))
=> (lambda (replacement)
;; We've found a hash that needs to be replaced.
;; First, write out all bytes preceding the hash
;; that have not yet been written.
(put-bytevector output buffer written
(- i hash-length written))
;; Now write the replacement hash.
(put-bytevector output replacement)
;; Since the byte at position 'i' is a dash,
;; which is not a nix-base32 char, the earliest
;; position where the next hash might start is
;; i+1, and the earliest position where the
;; following dash might start is (+ i 1
;; hash-length). Also, we have now written up to
;; position 'i' in the buffer.
(scan-from (+ i 1 hash-length) i)))
;; If the byte at position 'i' is a nix-base32 char,
;; then the dash we're looking for might be as early as
;; the following byte, so we can only advance by 1.
((nix-base32-byte? byte)
(scan-from (+ i 1) written))
;; If the byte at position 'i' is NOT a nix-base32
;; char, then the earliest position where the next hash
;; might start is i+1, with the following dash at
;; position (+ i 1 hash-length).
(else
(scan-from (+ i 1 hash-length) written))))
;; We have finished scanning the buffer. Now we determine how
;; many bytes have not yet been written, and how many bytes to
;; "unget". If 'end' is less than 'request-size' then we read
;; less than we asked for, which indicates that we are at EOF,
;; so we needn't unget anything. Otherwise, we unget up to
;; 'hash-length' bytes (32 bytes). However, we must be careful
;; not to unget bytes that have already been written, because
;; that would cause them to be written again from the next
;; buffer. In practice, this case occurs when a replacement is
;; made near the end of the buffer.
(let* ((unwritten (- end written))
(unget-size (if (= end request-size)
(min hash-length unwritten)
0))
(write-size (- unwritten unget-size)))
(put-bytevector output buffer written write-size)
(unget-bytevector input buffer (+ written write-size)
unget-size)
(loop)))))))))
(define (rename-matching-files directory mapping)
"Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is
@ -122,6 +211,35 @@ an exception is caught."
#:optional (store (%store-directory)))
"Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of
file name pairs."
(define hash-mapping
(let* ((prefix (string-append store "/"))
(start (string-length prefix))
(end (+ start hash-length)))
(define (valid-hash? h)
(every nix-base32-char? (string->list h)))
(define (valid-suffix? s)
(string-prefix? "-" s))
(define (hash+suffix s)
(and (< end (string-length s))
(let ((hash (substring s start end))
(suffix (substring s end)))
(and (string-prefix? prefix s)
(valid-hash? hash)
(valid-suffix? suffix)
(list hash suffix)))))
(map (match-lambda
(((= hash+suffix (origin-hash suffix))
.
(= hash+suffix (replacement-hash suffix)))
(cons origin-hash (string->utf8 replacement-hash)))
((origin . replacement)
(error "invalid replacement" origin replacement)))
mapping)))
(define replacement-table
(alist->vhash hash-mapping))
(define prefix-len
(string-length directory))
@ -138,18 +256,17 @@ file name pairs."
(symlink (call-with-output-string
(lambda (output)
(replace-store-references (open-input-string target)
output mapping
output replacement-table
store)))
dest)))
((regular)
(with-fluids ((%default-port-encoding #f))
(call-with-input-file file
(lambda (input)
(call-with-output-file dest
(lambda (output)
(replace-store-references input output mapping
store)
(chmod output (stat:perms stat))))))))
(call-with-input-file file
(lambda (input)
(call-with-output-file dest
(lambda (output)
(replace-store-references input output replacement-table
store)
(chmod output (stat:perms stat)))))))
((directory)
(mkdir-p dest))
(else