From ce72c780746776a86f59747f5eff8731cb4ff39b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Thu, 5 Mar 2015 22:00:11 +0100 Subject: [PATCH] store: Attempt to decode build logs as UTF-8. * guix/serialization.scm (read-maybe-utf8-string): New procedure. * guix/store.scm (process-stderr): Use it for the build log and errors. * tests/store.scm ("current-build-output-port, UTF-8", "current-build-output-port, UTF-8 + garbage"): New tests. --- guix/serialization.scm | 18 +++++++++++++++++- guix/store.scm | 9 ++++++--- tests/store.scm | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/guix/serialization.scm b/guix/serialization.scm index da01ff39f5..a99f53ee0b 100644 --- a/guix/serialization.scm +++ b/guix/serialization.scm @@ -29,7 +29,8 @@ #:export (write-int read-int write-long-long read-long-long write-padding - write-string read-string read-latin1-string + write-string + read-string read-latin1-string read-maybe-utf8-string write-string-list read-string-list write-string-pairs write-store-path read-store-path @@ -130,6 +131,21 @@ ;; upgraded to Guile >= 2.0.9. (list->string (map integer->char (bytevector->u8-list bv))))) +(define (read-maybe-utf8-string p) + "Read a serialized string from port P. Attempt to decode it as UTF-8 and +substitute invalid byte sequences with question marks. This is a +\"permissive\" UTF-8 decoder." + ;; XXX: We rely on the port's decoding mechanism to do permissive decoding + ;; and substitute invalid byte sequences with question marks, but this is + ;; not very efficient. Eventually Guile may provide a lightweight + ;; permissive UTF-8 decoder. + (let* ((bv (read-byte-string p)) + (port (with-fluids ((%default-port-encoding "UTF-8") + (%default-port-conversion-strategy + 'substitute)) + (open-bytevector-input-port bv)))) + (get-string-all port))) + (define (write-string-list l p) (write-int (length l) p) (for-each (cut write-string <> p) l)) diff --git a/guix/store.scm b/guix/store.scm index d88fb3ea54..a3f3cbf43b 100644 --- a/guix/store.scm +++ b/guix/store.scm @@ -418,15 +418,18 @@ encoding conversion errors." (write-padding len p) #f)) ((= k %stderr-next) - ;; Log a string. - (let ((s (read-latin1-string p))) + ;; Log a string. Build logs are usually UTF-8-encoded, but they + ;; may also contain arbitrary byte sequences that should not cause + ;; this to fail. Thus, use the permissive + ;; 'read-maybe-utf8-string'. + (let ((s (read-maybe-utf8-string p))) (display s (current-build-output-port)) (when (string-any %newlines s) (flush-output-port (current-build-output-port))) #f)) ((= k %stderr-error) ;; Report an error. - (let ((error (read-latin1-string p)) + (let ((error (read-maybe-utf8-string p)) ;; Currently the daemon fails to send a status code for early ;; errors like DB schema version mismatches, so check for EOF. (status (if (and (>= (nix-server-minor-version server) 8) diff --git a/tests/store.scm b/tests/store.scm index ee783be846..9ed78be085 100644 --- a/tests/store.scm +++ b/tests/store.scm @@ -25,6 +25,7 @@ #:use-module (guix packages) #:use-module (guix derivations) #:use-module (guix serialization) + #:use-module (guix gexp) #:use-module (gnu packages) #:use-module (gnu packages bootstrap) #:use-module (ice-9 match) @@ -268,6 +269,42 @@ (list a b c d w x y))) (lset= string=? s1 s3))))) +(test-assert "current-build-output-port, UTF-8" + ;; Are UTF-8 strings in the build log properly interpreted? + (string-contains + (with-fluids ((%default-port-encoding "UTF-8")) ;for the string port + (call-with-output-string + (lambda (port) + (parameterize ((current-build-output-port port)) + (let* ((s "Here’s a Greek letter: λ.") + (d (build-expression->derivation + %store "foo" `(display ,s) + #:guile-for-build + (package-derivation s %bootstrap-guile (%current-system))))) + (guard (c ((nix-protocol-error? c) #t)) + (build-derivations %store (list d)))))))) + "Here’s a Greek letter: λ.")) + +(test-assert "current-build-output-port, UTF-8 + garbage" + ;; What about a mixture of UTF-8 + garbage? + (string-contains + (with-fluids ((%default-port-encoding "UTF-8")) ;for the string port + (call-with-output-string + (lambda (port) + (parameterize ((current-build-output-port port)) + (let ((d (build-expression->derivation + %store "foo" + `(begin + (use-modules (rnrs io ports)) + (display "garbage: ") + (put-bytevector (current-output-port) #vu8(128)) + (display "lambda: λ\n")) + #:guile-for-build + (package-derivation %store %bootstrap-guile)))) + (guard (c ((nix-protocol-error? c) #t)) + (build-derivations %store (list d)))))))) + "garbage: ?lambda: λ")) + (test-assert "log-file, derivation" (let* ((b (add-text-to-store %store "build" "echo $foo > $out" '())) (s (add-to-store %store "bash" #t "sha256"