grafts: Make grafting faster.
* guix/build/graft.scm (replace-store-references): Reimplement for faster grafting. Use binary I/O instead of textual I/O. Replace 'mapping' argument (an alist) with 'replacement-table' (a vhash). (rewrite-directory): Adapt to mapping argument change in 'replace-store-references'. Remove 'with-fluids' that previously set '%default-port-encoding' to #f, since we now use binary I/O. (define-inline, hash-length): New macros. (nix-base32-char?): New variable.
This commit is contained in:
parent
ba6d25f3b9
commit
5a1add373a
|
@ -1,5 +1,6 @@
|
|||
;;; GNU Guix --- Functional package management for GNU
|
||||
;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org>
|
||||
;;; Copyright © 2016 Mark H Weaver <mhw@netris.org>
|
||||
;;;
|
||||
;;; This file is part of GNU Guix.
|
||||
;;;
|
||||
|
@ -20,8 +21,12 @@
|
|||
#:use-module (guix build utils)
|
||||
#:use-module (rnrs bytevectors)
|
||||
#:use-module (rnrs io ports)
|
||||
#:use-module (ice-9 vlist)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 threads)
|
||||
#:use-module (ice-9 binary-ports)
|
||||
#:use-module (srfi srfi-1) ; list library
|
||||
#:use-module (srfi srfi-26) ; cut and cute
|
||||
#:export (replace-store-references
|
||||
rewrite-directory))
|
||||
|
||||
|
@ -38,50 +43,134 @@
|
|||
;;;
|
||||
;;; Code:
|
||||
|
||||
(define* (replace-store-references input output mapping
|
||||
(define-syntax-rule (define-inline name val)
|
||||
(define-syntax name (identifier-syntax val)))
|
||||
|
||||
(define-inline hash-length 32)
|
||||
|
||||
(define nix-base32-char?
|
||||
(cute char-set-contains?
|
||||
;; ASCII digits and lower case letters except e o t u
|
||||
(string->char-set "0123456789abcdfghijklmnpqrsvwxyz")
|
||||
<>))
|
||||
|
||||
(define* (replace-store-references input output replacement-table
|
||||
#:optional (store (%store-directory)))
|
||||
"Read data from INPUT, replacing store references according to MAPPING, and
|
||||
writing the result to OUTPUT."
|
||||
(define pattern
|
||||
(let ((nix-base32-chars
|
||||
'(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
|
||||
#\a #\b #\c #\d #\f #\g #\h #\i #\j #\k #\l #\m #\n
|
||||
#\p #\q #\r #\s #\v #\w #\x #\y #\z)))
|
||||
`(,@(map char-set (string->list store))
|
||||
,(char-set #\/)
|
||||
,@(make-list 32 (list->char-set nix-base32-chars))
|
||||
,(char-set #\-))))
|
||||
"Read data from INPUT, replacing store references according to
|
||||
REPLACEMENT-TABLE, and writing the result to OUTPUT. REPLACEMENT-TABLE is a
|
||||
vhash that maps strings (original hashes) to bytevectors (replacement hashes).
|
||||
Note: We use string keys to work around the fact that guile-2.0 hashes all
|
||||
bytevectors to the same value."
|
||||
|
||||
;; We cannot use `regexp-exec' here because it cannot deal with strings
|
||||
;; containing NUL characters, hence 'fold-port-matches'.
|
||||
(with-fluids ((%default-port-encoding #f))
|
||||
(when (file-port? input)
|
||||
(setvbuf input _IOFBF 65536))
|
||||
(when (file-port? output)
|
||||
(setvbuf output _IOFBF 65536))
|
||||
|
||||
(let* ((len (+ 34 (string-length store)))
|
||||
(mapping (map (match-lambda
|
||||
(define (lookup-replacement s)
|
||||
(match (vhash-assoc s replacement-table)
|
||||
((origin . replacement)
|
||||
(unless (string=? (string-drop origin len)
|
||||
(string-drop replacement len))
|
||||
(error "invalid replacement" origin replacement))
|
||||
(cons (string-take origin len)
|
||||
(string-take replacement len))))
|
||||
mapping)))
|
||||
(fold-port-matches (lambda (string result)
|
||||
(match (assoc-ref mapping string)
|
||||
(#f
|
||||
(put-bytevector output (string->utf8 string)))
|
||||
((= string->utf8 replacement)
|
||||
(put-bytevector output replacement)))
|
||||
#t)
|
||||
#f
|
||||
pattern
|
||||
input
|
||||
(lambda (char result) ;unmatched
|
||||
(put-u8 output (char->integer char))
|
||||
result)))))
|
||||
replacement)
|
||||
(#f #f)))
|
||||
|
||||
(define (optimize-u8-predicate pred)
|
||||
(cute vector-ref
|
||||
(list->vector (map pred (iota 256)))
|
||||
<>))
|
||||
|
||||
(define nix-base32-byte?
|
||||
(optimize-u8-predicate
|
||||
(compose nix-base32-char?
|
||||
integer->char)))
|
||||
|
||||
(define (dash? byte) (= byte 45))
|
||||
|
||||
(define request-size (expt 2 20)) ; 1 MiB
|
||||
|
||||
;; We scan the file for the following 33-byte pattern: 32 bytes of
|
||||
;; nix-base32 characters followed by a dash. To accommodate large files,
|
||||
;; we do not read the entire file, but instead work on buffers of up to
|
||||
;; 'request-size' bytes. To ensure that every 33-byte sequence appears
|
||||
;; entirely within exactly one buffer, adjacent buffers must overlap,
|
||||
;; i.e. they must share 32 byte positions. We accomplish this by
|
||||
;; "ungetting" the last 32 bytes of each buffer before reading the next
|
||||
;; buffer, unless we know that we've reached the end-of-file.
|
||||
(let ((buffer (make-bytevector request-size)))
|
||||
(let loop ()
|
||||
;; Note: We avoid 'get-bytevector-n' to work around
|
||||
;; <http://bugs.gnu.org/17466>.
|
||||
(match (get-bytevector-n! input buffer 0 request-size)
|
||||
((? eof-object?) 'done)
|
||||
(end
|
||||
;; We scan the buffer for dashes that might be preceded by a
|
||||
;; nix-base32 hash. The key optimization here is that whenever we
|
||||
;; find a NON-nix-base32 character at position 'i', we know that it
|
||||
;; cannot be part of a hash, so the earliest position where the next
|
||||
;; hash could start is i+1 with the following dash at position i+33.
|
||||
;;
|
||||
;; Since nix-base32 characters comprise only 1/8 of the 256 possible
|
||||
;; byte values, and exclude some of the most common letters in
|
||||
;; English text (e t o u), in practice we can advance by 33 positions
|
||||
;; most of the time.
|
||||
(let scan-from ((i hash-length) (written 0))
|
||||
;; 'i' is the first position where we look for a dash. 'written'
|
||||
;; is the number of bytes in the buffer that have already been
|
||||
;; written.
|
||||
(if (< i end)
|
||||
(let ((byte (bytevector-u8-ref buffer i)))
|
||||
(cond ((and (dash? byte)
|
||||
;; We've found a dash. Note that we do not know
|
||||
;; whether the preceeding 32 bytes are nix-base32
|
||||
;; characters, but we do not need to know. If
|
||||
;; they are not, the following lookup will fail.
|
||||
(lookup-replacement
|
||||
(string-tabulate (lambda (j)
|
||||
(integer->char
|
||||
(bytevector-u8-ref buffer
|
||||
(+ j (- i hash-length)))))
|
||||
hash-length)))
|
||||
=> (lambda (replacement)
|
||||
;; We've found a hash that needs to be replaced.
|
||||
;; First, write out all bytes preceding the hash
|
||||
;; that have not yet been written.
|
||||
(put-bytevector output buffer written
|
||||
(- i hash-length written))
|
||||
;; Now write the replacement hash.
|
||||
(put-bytevector output replacement)
|
||||
;; Since the byte at position 'i' is a dash,
|
||||
;; which is not a nix-base32 char, the earliest
|
||||
;; position where the next hash might start is
|
||||
;; i+1, and the earliest position where the
|
||||
;; following dash might start is (+ i 1
|
||||
;; hash-length). Also, we have now written up to
|
||||
;; position 'i' in the buffer.
|
||||
(scan-from (+ i 1 hash-length) i)))
|
||||
;; If the byte at position 'i' is a nix-base32 char,
|
||||
;; then the dash we're looking for might be as early as
|
||||
;; the following byte, so we can only advance by 1.
|
||||
((nix-base32-byte? byte)
|
||||
(scan-from (+ i 1) written))
|
||||
;; If the byte at position 'i' is NOT a nix-base32
|
||||
;; char, then the earliest position where the next hash
|
||||
;; might start is i+1, with the following dash at
|
||||
;; position (+ i 1 hash-length).
|
||||
(else
|
||||
(scan-from (+ i 1 hash-length) written))))
|
||||
|
||||
;; We have finished scanning the buffer. Now we determine how
|
||||
;; many bytes have not yet been written, and how many bytes to
|
||||
;; "unget". If 'end' is less than 'request-size' then we read
|
||||
;; less than we asked for, which indicates that we are at EOF,
|
||||
;; so we needn't unget anything. Otherwise, we unget up to
|
||||
;; 'hash-length' bytes (32 bytes). However, we must be careful
|
||||
;; not to unget bytes that have already been written, because
|
||||
;; that would cause them to be written again from the next
|
||||
;; buffer. In practice, this case occurs when a replacement is
|
||||
;; made near the end of the buffer.
|
||||
(let* ((unwritten (- end written))
|
||||
(unget-size (if (= end request-size)
|
||||
(min hash-length unwritten)
|
||||
0))
|
||||
(write-size (- unwritten unget-size)))
|
||||
(put-bytevector output buffer written write-size)
|
||||
(unget-bytevector input buffer (+ written write-size)
|
||||
unget-size)
|
||||
(loop)))))))))
|
||||
|
||||
(define (rename-matching-files directory mapping)
|
||||
"Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is
|
||||
|
@ -122,6 +211,35 @@ an exception is caught."
|
|||
#:optional (store (%store-directory)))
|
||||
"Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of
|
||||
file name pairs."
|
||||
|
||||
(define hash-mapping
|
||||
(let* ((prefix (string-append store "/"))
|
||||
(start (string-length prefix))
|
||||
(end (+ start hash-length)))
|
||||
(define (valid-hash? h)
|
||||
(every nix-base32-char? (string->list h)))
|
||||
(define (valid-suffix? s)
|
||||
(string-prefix? "-" s))
|
||||
(define (hash+suffix s)
|
||||
(and (< end (string-length s))
|
||||
(let ((hash (substring s start end))
|
||||
(suffix (substring s end)))
|
||||
(and (string-prefix? prefix s)
|
||||
(valid-hash? hash)
|
||||
(valid-suffix? suffix)
|
||||
(list hash suffix)))))
|
||||
(map (match-lambda
|
||||
(((= hash+suffix (origin-hash suffix))
|
||||
.
|
||||
(= hash+suffix (replacement-hash suffix)))
|
||||
(cons origin-hash (string->utf8 replacement-hash)))
|
||||
((origin . replacement)
|
||||
(error "invalid replacement" origin replacement)))
|
||||
mapping)))
|
||||
|
||||
(define replacement-table
|
||||
(alist->vhash hash-mapping))
|
||||
|
||||
(define prefix-len
|
||||
(string-length directory))
|
||||
|
||||
|
@ -138,18 +256,17 @@ file name pairs."
|
|||
(symlink (call-with-output-string
|
||||
(lambda (output)
|
||||
(replace-store-references (open-input-string target)
|
||||
output mapping
|
||||
output replacement-table
|
||||
store)))
|
||||
dest)))
|
||||
((regular)
|
||||
(with-fluids ((%default-port-encoding #f))
|
||||
(call-with-input-file file
|
||||
(lambda (input)
|
||||
(call-with-output-file dest
|
||||
(lambda (output)
|
||||
(replace-store-references input output mapping
|
||||
(replace-store-references input output replacement-table
|
||||
store)
|
||||
(chmod output (stat:perms stat))))))))
|
||||
(chmod output (stat:perms stat)))))))
|
||||
((directory)
|
||||
(mkdir-p dest))
|
||||
(else
|
||||
|
|
Loading…
Reference in New Issue