status: Gracefully handle invalid UTF-8 in build logs.

* guix/status.scm (maybe-utf8->string): New procedure.
(build-event-output-port): Use it in lieu of 'utf8->string'.
* tests/status.scm ("build-output-port, UTF-8")
("current-build-output-port, UTF-8 + garbage"): New tests.
This commit is contained in:
Ludovic Courtès 2018-10-09 09:53:02 +02:00
parent 276f368051
commit fe17037b38
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
2 changed files with 39 additions and 2 deletions

View File

@ -34,6 +34,7 @@
#:use-module (ice-9 match) #:use-module (ice-9 match)
#:use-module (ice-9 format) #:use-module (ice-9 format)
#:use-module (ice-9 binary-ports) #:use-module (ice-9 binary-ports)
#:autoload (ice-9 rdelim) (read-string)
#:use-module (rnrs bytevectors) #:use-module (rnrs bytevectors)
#:use-module ((system foreign) #:use-module ((system foreign)
#:select (bytevector->pointer pointer->bytevector)) #:select (bytevector->pointer pointer->bytevector))
@ -429,6 +430,22 @@ ON-CHANGE can display the build status, build events, etc."
(define %newline (define %newline
(char-set #\return #\newline)) (char-set #\return #\newline))
(define (maybe-utf8->string bv)
"Attempt to decode BV as UTF-8 string and return it. Gracefully handle the
case where BV does not contain only valid UTF-8."
(catch 'decoding-error
(lambda ()
(utf8->string bv))
(lambda _
;; This is the sledgehammer but it's the only safe way we have to
;; properly handle this. It's expensive but it's rarely needed.
(let ((port (open-bytevector-input-port bv)))
(set-port-encoding! port "UTF-8")
(set-port-conversion-strategy! port 'substitute)
(let ((str (read-string port)))
(close-port port)
str)))))
(define* (build-event-output-port proc #:optional (seed (build-status))) (define* (build-event-output-port proc #:optional (seed (build-status)))
"Return an output port for use as 'current-build-output-port' that calls "Return an output port for use as 'current-build-output-port' that calls
PROC with its current state value, initialized with SEED, on every build PROC with its current state value, initialized with SEED, on every build
@ -464,7 +481,7 @@ The second return value is a thunk to retrieve the current state."
(pointer->bytevector ptr count))) (pointer->bytevector ptr count)))
(define (write! bv offset count) (define (write! bv offset count)
(let loop ((str (utf8->string (bytevector-range bv offset count)))) (let loop ((str (maybe-utf8->string (bytevector-range bv offset count))))
(match (string-index str %newline) (match (string-index str %newline)
((? integer? cr) ((? integer? cr)
(let ((tail (string-take str (+ 1 cr)))) (let ((tail (string-take str (+ 1 cr))))

View File

@ -20,7 +20,9 @@
#:use-module (guix status) #:use-module (guix status)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (srfi srfi-11) #:use-module (srfi srfi-11)
#:use-module (srfi srfi-64)) #:use-module (srfi srfi-64)
#:use-module (rnrs bytevectors)
#:use-module (rnrs io ports))
(test-begin "status") (test-begin "status")
@ -112,4 +114,22 @@
(display "@ substituter-succeeded baz\n" port) (display "@ substituter-succeeded baz\n" port)
(list first (get-status))))) (list first (get-status)))))
(test-equal "build-output-port, UTF-8"
'((build-log "lambda is λ!\n"))
(let-values (((port get-status) (build-event-output-port cons '()))
((bv) (string->utf8 "lambda is λ!\n")))
(put-bytevector port bv)
(force-output port)
(get-status)))
(test-equal "current-build-output-port, UTF-8 + garbage"
;; What about a mixture of UTF-8 + garbage?
'((build-log "garbage: <20>lambda: λ\n"))
(let-values (((port get-status) (build-event-output-port cons '())))
(display "garbage: " port)
(put-bytevector port #vu8(128))
(put-bytevector port (string->utf8 "lambda: λ\n"))
(force-output port)
(get-status)))
(test-end "status") (test-end "status")