;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015, 2016, 2017, 2018 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (guix cve)
  #:use-module (guix utils)
  #:use-module (guix http-client)
  #:use-module (sxml ssax)
  #:use-module (web uri)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-11)
  #:use-module (srfi srfi-19)
  #:use-module (srfi srfi-26)
  #:use-module (ice-9 match)
  #:use-module (ice-9 regex)
  #:use-module (ice-9 vlist)
  #:export (vulnerability?
            vulnerability-id
            vulnerability-packages

            xml->vulnerabilities
            current-vulnerabilities
            vulnerabilities->lookup-proc))

;;; Commentary:
;;;
;;; This modules provides the tools to fetch, parse, and digest part of the
;;; Common Vulnerabilities and Exposures (CVE) feeds provided by the US NIST
;;; at <https://nvd.nist.gov/download.cfm#CVE_FEED>.
;;;
;;; Code:

(define-record-type <vulnerability>
  (vulnerability id packages)
  vulnerability?
  (id         vulnerability-id)                   ;string
  (packages   vulnerability-packages))            ;((p1 v1 v2 v3) (p2 v1) ...)

(define %now
  (current-date))
(define %current-year
  (date-year %now))
(define %past-year
  (- %current-year 1))

(define (yearly-feed-uri year)
  "Return the URI for the CVE feed for YEAR."
  (string->uri
   (string-append "https://nvd.nist.gov/feeds/xml/cve/nvdcve-2.0-"
                  (number->string year) ".xml.gz")))

(define %current-year-ttl
  ;; According to <https://nvd.nist.gov/download.cfm#CVE_FEED>, feeds are
  ;; updated "approximately every two hours."
  (* 60 30))

(define %past-year-ttl
  ;; Update the previous year's database more and more infrequently.
  (* 3600 24 (date-month %now)))

(define %cpe-package-rx
  ;; For applications: "cpe:/a:VENDOR:PACKAGE:VERSION", or sometimes
  ;; "cpe/a:VENDOR:PACKAGE:VERSION:PATCH-LEVEL".
  (make-regexp "^cpe:/a:([^:]+):([^:]+):([^:]+)((:.+)?)"))

(define (cpe->package-name cpe)
  "Converts the Common Platform Enumeration (CPE) string CPE to a package
name, in a very naive way.  Return two values: the package name, and its
version string.  Return #f and #f if CPE does not look like an application CPE
string."
  (cond ((regexp-exec %cpe-package-rx (string-trim-both cpe))
         =>
         (lambda (matches)
           (values (match:substring matches 2)
                   (string-append (match:substring matches 3)
                                  (match (match:substring matches 4)
                                    ("" "")
                                    (patch-level
                                     ;; Drop the colon from things like
                                     ;; "cpe:/a:openbsd:openssh:6.8:p1".
                                     (string-drop patch-level 1)))))))
        (else
         (values #f #f))))

(define (cpe->product-alist products)
  "Given PRODUCTS, a list of CPE names, return the subset limited to the
applications listed in PRODUCTS, with names converted to package names:

  (cpe->product-alist
    '(\"cpe:/a:gnu:libtasn1:4.7\" \"cpe:/a:gnu:libtasn1:4.6\" \"cpe:/a:gnu:cpio:2.11\"))
  => ((\"libtasn1\" \"4.7\" \"4.6\") (\"cpio\" \"2.11\"))
"
  (fold (lambda (product result)
          (let-values (((name version) (cpe->package-name product)))
            (if name
                (match result
                  (((previous . versions) . tail)
                   ;; Attempt to coalesce NAME and PREVIOUS.
                   (if (string=? name previous)
                       (alist-cons name (cons version versions) tail)
                       (alist-cons name (list version) result)))
                  (()
                   (alist-cons name (list version) result)))
                result)))
        '()
        (sort products string<?)))

(define %parse-vulnerability-feed
  ;; Parse the XML vulnerability feed from
  ;; <https://nvd.nist.gov/download.cfm#CVE_FEED> and return a list of
  ;; vulnerability objects.
  (ssax:make-parser NEW-LEVEL-SEED
                    (lambda (elem-gi attributes namespaces expected-content
                                     seed)
                      (match elem-gi
                        ((name-space . 'entry)
                         (cons (assoc-ref attributes 'id) seed))
                        ((name-space . 'vulnerable-software-list)
                         (cons '() seed))
                        ((name-space . 'product)
                         (cons 'product seed))
                        (x seed)))

                    FINISH-ELEMENT
                    (lambda (elem-gi attributes namespaces parent-seed
                                     seed)
                      (match elem-gi
                        ((name-space . 'entry)
                         (match seed
                           (((? string? id) . rest)
                            ;; Some entries have no vulnerable-software-list.
                            rest)
                           ((products id . rest)
                            (match (cpe->product-alist products)
                              (()
                               ;; No application among PRODUCTS.
                               rest)
                              (packages
                               (cons (vulnerability id packages)
                                     rest))))))
                        (x
                         seed)))

                    CHAR-DATA-HANDLER
                    (lambda (str _ seed)
                      (match seed
                        (('product software-list . rest)
                         ;; Add STR to the vulnerable software list this
                         ;; <product> tag is part of.
                         (cons (cons str software-list) rest))
                        (x x)))))

(define (xml->vulnerabilities port)
  "Read from PORT an XML feed of vulnerabilities and return a list of
vulnerability objects."
  (reverse (%parse-vulnerability-feed port '())))

(define vulnerability->sexp
  (match-lambda
    (($ <vulnerability> id packages)
     `(v ,id ,packages))))

(define sexp->vulnerability
  (match-lambda
    (('v id (packages ...))
     (vulnerability id packages))))

(define (write-cache input cache)
  "Read vulnerabilities as gzipped XML from INPUT, and write it as a compact
sexp to CACHE."
  (call-with-decompressed-port 'gzip input
    (lambda (input)
      ;; XXX: The SSAX "error port" is used to send pointless warnings such as
      ;; "warning: Skipping PI".  Turn that off.
      (define vulns
        (parameterize ((current-ssax-error-port (%make-void-port "w")))
          (xml->vulnerabilities input)))

      (write `(vulnerabilities
               1                                  ;format version
               ,(map vulnerability->sexp vulns))
             cache))))

(define (fetch-vulnerabilities year ttl)
  "Return the list of <vulnerability> for YEAR, assuming the on-disk cache has
the given TTL (fetch from the NIST web site when TTL has expired)."
  (define (cache-miss uri)
    (format (current-error-port) "fetching CVE database for ~a...~%" year))

  (define (read* port)
    ;; Disable read options to avoid populating the source property weak
    ;; table, which speeds things up, saves memory, and works around
    ;; <https://lists.gnu.org/archive/html/guile-devel/2017-09/msg00031.html>.
    (let ((options (read-options)))
      (dynamic-wind
        (lambda ()
          (read-disable 'positions))
        (lambda ()
          (read port))
        (lambda ()
          (read-options options)))))

  ;; Note: We used to keep the original XML files in cache but parsing it
  ;; would take typically ~15s for a year of data.  Thus, we instead store a
  ;; summarized version thereof as an sexp, which can be parsed in 1s or so.
  (let* ((port (http-fetch/cached (yearly-feed-uri year)
                                  #:ttl ttl
                                  #:write-cache write-cache
                                  #:cache-miss cache-miss))
         (sexp (read* port)))
    (close-port port)
    (match sexp
      (('vulnerabilities 1 vulns)
       (map sexp->vulnerability vulns)))))

(define (current-vulnerabilities)
  "Return the current list of Common Vulnerabilities and Exposures (CVE) as
published by the US NIST."
  (let ((past-years (unfold (cut > <> 3)
                            (lambda (n)
                              (- %current-year n))
                            1+
                            1))
        (past-ttls  (unfold (cut > <> 3)
                            (lambda (n)
                              (* n %past-year-ttl))
                            1+
                            1)))
    (append-map fetch-vulnerabilities
                (cons %current-year past-years)
                (cons %current-year-ttl past-ttls))))

(define (vulnerabilities->lookup-proc vulnerabilities)
  "Return a lookup procedure built from VULNERABILITIES that takes a package
name and optionally a version number.  When the version is omitted, the lookup
procedure returns a list of vulnerabilities; otherwise, it returns a list of
vulnerabilities affecting the given package version."
  (define table
    ;; Map package names to lists of version/vulnerability pairs.
    (fold (lambda (vuln table)
            (match vuln
              (($ <vulnerability> id packages)
               (fold (lambda (package table)
                       (match package
                         ((name . versions)
                          (vhash-cons name (cons vuln versions)
                                      table))))
                     table
                     packages))))
          vlist-null
          vulnerabilities))

  (lambda* (package #:optional version)
    (vhash-fold* (if version
                     (lambda (pair result)
                       (match pair
                         ((vuln . versions)
                          (if (member version versions)
                              (cons vuln result)
                              result))))
                     (lambda (pair result)
                       (match pair
                         ((vuln . _)
                          (cons vuln result)))))
                 '()
                 package table)))


;;; cve.scm ends here