offload: Decompose 'machine-load' into simpler procedures.

* guix/scripts/offload.scm (machine-load): Remove.
(node-load, normalized-load): New procedures.
(choose-build-machine): Call 'open-ssh-session' and 'make-node' from
here; pass the node to 'node-load'.
(check-machine-status): Use 'node-load' instead of 'machine-load'.  Call
'disconnect!' on SESSION.
This commit is contained in:
Ludovic Courtès 2018-12-21 22:54:02 +01:00
parent 295430f0cf
commit bbe66a530a
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
1 changed files with 46 additions and 42 deletions

View File

@ -392,33 +392,31 @@ MACHINE."
(build-requirements-features requirements) (build-requirements-features requirements)
(build-machine-features machine)))) (build-machine-features machine))))
(define (machine-load machine) (define (node-load node)
"Return the load of MACHINE, divided by the number of parallel builds "Return the load on NODE. Return +∞ if NODE is misbehaving."
allowed on MACHINE. Return + if MACHINE is unreachable." (let ((line (node-eval node
;; Note: This procedure is costly since it creates a new SSH session. '(begin
(match (false-if-exception (open-ssh-session machine)) (use-modules (ice-9 rdelim))
((? session? session) (call-with-input-file "/proc/loadavg"
(let* ((pipe (open-remote-pipe* session OPEN_READ read-string)))))
"cat" "/proc/loadavg")) (if (eof-object? line)
(line (read-line pipe))) +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
(close-port pipe) (match (string-tokenize line)
(disconnect! session) ((one five fifteen . x)
(string->number one))
(x
+inf.0)))))
(if (eof-object? line) (define (normalized-load machine load)
+inf.0 ;MACHINE does not respond, so assume it is infinitely loaded "Divide LOAD by the number of parallel builds of MACHINE."
(match (string-tokenize line) (if (rational? load)
((one five fifteen . x) (let* ((jobs (build-machine-parallel-builds machine))
(let* ((raw (string->number one)) (normalized (/ load jobs)))
(jobs (build-machine-parallel-builds machine)) (format (current-error-port) "load on machine '~a' is ~s\
(normalized (/ raw jobs)))
(format (current-error-port) "load on machine '~a' is ~s\
(normalized: ~s)~%" (normalized: ~s)~%"
(build-machine-name machine) raw normalized) (build-machine-name machine) load normalized)
normalized)) normalized)
(x load))
+inf.0))))) ;something's fishy about MACHINE, so avoid it
(x
+inf.0))) ;failed to connect to MACHINE, so avoid it
(define (machine-lock-file machine hint) (define (machine-lock-file machine hint)
"Return the name of MACHINE's lock file for HINT." "Return the name of MACHINE's lock file for HINT."
@ -484,21 +482,25 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
(match machines+slots (match machines+slots
(((best slot) others ...) (((best slot) others ...)
;; Return the best machine unless it's already overloaded. ;; Return the best machine unless it's already overloaded.
;; Note: We call 'machine-load' only as a last resort because it is ;; Note: We call 'node-load' only as a last resort because it is
;; too costly to call it once for every machine. ;; too costly to call it once for every machine.
(if (< (machine-load best) 2.) (let* ((session (false-if-exception (open-ssh-session best)))
(match others (node (and session (make-node session)))
(((machines slots) ...) (load (and node (normalized-load best (node-load node)))))
;; Release slots from the uninteresting machines. (when session (disconnect! session))
(for-each release-build-slot slots) (if (and node (< load 2.))
(match others
(((machines slots) ...)
;; Release slots from the uninteresting machines.
(for-each release-build-slot slots)
;; The caller must keep SLOT to protect it from GC and to ;; The caller must keep SLOT to protect it from GC and to
;; eventually release it. ;; eventually release it.
(values best slot))) (values best slot)))
(begin (begin
;; BEST is overloaded, so try the next one. ;; BEST is overloaded, so try the next one.
(release-build-slot slot) (release-build-slot slot)
(loop others)))) (loop others)))))
(() (()
(values #f #f)))))) (values #f #f))))))
@ -689,16 +691,18 @@ machine."
(info (G_ "getting status of ~a build machines defined in '~a'...~%") (info (G_ "getting status of ~a build machines defined in '~a'...~%")
(length machines) machine-file) (length machines) machine-file)
(for-each (lambda (machine) (for-each (lambda (machine)
(let* ((node (make-node (open-ssh-session machine))) (let* ((session (open-ssh-session machine))
(uts (node-eval node '(uname)))) (node (make-node session))
(uts (node-eval node '(uname)))
(load (node-load node)))
(disconnect! session)
(format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\
host name: ~a~% normalized load: ~a~%" host name: ~a~% normalized load: ~a~%"
(build-machine-name machine) (build-machine-name machine)
(utsname:sysname uts) (utsname:release uts) (utsname:sysname uts) (utsname:release uts)
(utsname:machine uts) (utsname:machine uts)
(utsname:nodename uts) (utsname:nodename uts)
(parameterize ((current-error-port (%make-void-port "rw+"))) load)))
(machine-load machine)))))
machines))) machines)))