Package: guix-patches;
Reported by: Danny Milosavljevic <dannym <at> friendly-machines.com>
Date: Thu, 24 Apr 2025 23:04:02 UTC
Severity: normal
To reply to this bug, email your comments to 78051 AT debbugs.gnu.org.
Toggle the display of automated, internal messages from the tracker.
View this report as an mbox folder, status mbox, maintainer mbox
guix-patches <at> gnu.org
:bug#78051
; Package guix-patches
.
(Thu, 24 Apr 2025 23:04:02 GMT) Full text and rfc822 format available.Danny Milosavljevic <dannym <at> friendly-machines.com>
:guix-patches <at> gnu.org
.
(Thu, 24 Apr 2025 23:04:02 GMT) Full text and rfc822 format available.Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):
From: Danny Milosavljevic <dannym <at> friendly-machines.com> To: guix-patches <at> gnu.org Cc: Danny Milosavljevic <dannym <at> friendly-machines.com> Subject: [WIP] services: root-file-system: In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems. Date: Fri, 25 Apr 2025 01:03:17 +0200
* gnu/services/base.scm (%root-file-system-shepherd-service): In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems. Change-Id: If244a1594281057ee5b6163e23bcf11fab3968ff --- gnu/services/base.scm | 381 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 367 insertions(+), 14 deletions(-) diff --git a/gnu/services/base.scm b/gnu/services/base.scm index 8c6563c99d..de24d07b4e 100644 --- a/gnu/services/base.scm +++ b/gnu/services/base.scm @@ -348,10 +348,337 @@ (define %root-file-system-shepherd-service (provision '(root-file-system)) (start #~(const #t)) (stop #~(lambda _ - ;; Return #f if successfully stopped. + ;;; Return #f if successfully stopped. + + ;;; Beginning of inlined module (fuser) + + (use-modules (ice-9 textual-ports) + (ice-9 control) + (ice-9 string-fun) + (ice-9 match) + (ice-9 ftw) ; scandir + (srfi srfi-1) ; filter, for-each, find. + (srfi srfi-26) ; cut + (ice-9 exceptions)) ; guard + + (define PROC-DIR-NAME "/proc") + (define DEFAULT-SILENT-ERRORS + (list ENOENT ESRCH)) + + (define* (call-with-safe-syscall thunk + #:key + (on-error #f) + (silent-errors DEFAULT-SILENT-ERRORS) + (error-message-format #f) + (error-context '())) + "Call THUNK, handling system errors: +- If ERROR-MESSAGE-FORMAT and the error is not in SILENT-ERRORS, calls format +with ERROR-MESSAGE-FORMAT and ERROR-CONTEXT and (strerror errno) as arguments. +- Return ON-ERROR on error." + (catch 'system-error + thunk + (lambda args + (let ((errno (system-error-errno args))) + (unless (member errno silent-errors) + (when error-message-format + (apply format + (current-error-port) + error-message-format + (append + error-context + (list (strerror errno)))))) + on-error)))) + + (define (safe-stat path) + "Get stat info for PATH--or #f if not possible." + (call-with-safe-syscall (lambda () (stat path)) + #:error-message-format "Error: Cannot stat ~s: ~a~%" + #:error-context (list path) + #:silent-errors '() + #:on-error #f)) + + (define (safe-umount path) + "Umount PATH--if possible.." + (call-with-safe-syscall (lambda () (umount path)) + #:error-message-format "Error: Cannot umount ~s: ~a~%" + #:error-context (list path) + #:silent-errors '() + #:on-error 'error)) + + (define (safe-lstat path) + "Get lstat info for PATH--or #f if not possible." + (call-with-safe-syscall (lambda () (lstat path)) + #:error-message-format "Error: Cannot lstat ~s: ~a~%" + #:error-context (list path) + #:on-error #f)) + + (define (safe-scandir path) + "scandir PATH--or #f if not possible." + (let ((result (scandir path))) + (if result + result + (begin + (format (current-error-port) "Error: Cannot scandir ~s: ?~%" path) + '())))) + +;;; Processes + + (define (safe-get-fd-flags pid fd) + "Get flags for FD in PID--or #f if not possible." + (let ((fdinfo-path (format #f "~a/~a/fdinfo/~a" PROC-DIR-NAME pid fd))) + (call-with-safe-syscall (lambda () + (call-with-input-file fdinfo-path + (lambda (port) + ;; Find 'flags:' line and parse octal value + (let loop () + (let ((line (get-line port))) + (cond ((eof-object? line) #f) + ((string-prefix? "flags:\t" line) + (match (string-split line #\tab) + ((_ flags-str . _) + (catch 'invalid-argument + (lambda () + (string->number flags-str 8)) + (lambda args + #f))) + (_ #f))) + (else (loop)))))))) + #:error-message-format "Error: Cannot read ~s: ~a~%" + #:error-context (list fdinfo-path) + #:on-error #f))) + + (define (safe-get-processes) + "Get a list of all PIDs from proc--or #f if not possible." + (let ((proc-dir PROC-DIR-NAME)) + (catch 'system-error + (lambda () + ;; Keep only numbers. + (filter-map string->number (safe-scandir proc-dir))) + ;; FIXME is errno even useful? + (lambda scan-err + (format (current-error-port) "Error scanning ~s: ~a~%" + proc-dir (strerror (system-error-errno scan-err))) + '())))) + + (define (safe-fd-on-device? pid fd target-device) + "Return whether fd FD on pid PID is on device TARGET-DEVICE." + (let* ((fd-path (format #f "~a/~a/fd/~a" PROC-DIR-NAME pid fd)) + (link-stat (safe-lstat fd-path))) + (and link-stat (eqv? (stat:dev link-stat) + target-device)))) + + (define (safe-get-process-fds pid) + "Get a list of all FDs of PID from proc--or #f if not possible." + (let ((fd-dir (format #f "~a/~a/fd" PROC-DIR-NAME pid))) + ;; Keep only numbers. + + + (filter-map string->number (safe-scandir fd-dir)))) + + (define (filter-process-fd-flags pid fds predicate) + "Get FLAGS from proc for PID and call PREDICATE with (FD FLAGS) each." + (filter (lambda (fd) + (predicate fd (safe-get-fd-flags pid fd))) + fds)) + + (define (safe-get-process-command pid) + "Return command of process PID--or #f if not possible." + (let ((cmdline-path (format #f "~a/~a/cmdline" PROC-DIR-NAME pid))) + (call-with-safe-syscall (lambda () + (call-with-input-file cmdline-path + (lambda (port) + (let ((full-cmdline (get-string-all port))) + (match (string-split full-cmdline #\nul) + ((command-name . _) command-name)))))) + #:error-message-format "Error: Cannot read ~s: ~a~%" + #:error-context (list cmdline-path) + #:on-error #f))) + + (define (safe-kill-process pid kill-signal) + "Kill process PID with KILL-SIGNAL if possible." + (call-with-safe-syscall (lambda () + (kill pid kill-signal) + #t) + #:on-error 'error + #:silent-errors '() + #:error-message-format + "Error: Failed to kill process ~a: ~a~%" + #:error-context '())) + +;;; Mounts + + (define (safe-get-device mount-point) + "Get the device ID (st_dev) of MOUNT-POINT--or #f if not possible." + (and=> + (safe-stat mount-point) + stat:dev)) + + (define (safe-parse-mountinfo path) + "Read and parse /proc/self/mountinfo (or specified path). +Return a list of parsed entries, where each entry is: +(list mount-id parent-id mount-point-string) +Return '() on file read error or if file is unparseable." + (call-with-safe-syscall ; TODO: call-with-input-file is not actually a syscall. + (lambda () + (let ((entries '())) + (call-with-input-file path + (lambda (port) + (let loop () + (let ((line (get-line port))) + (unless (eof-object? line) + (match (string-split line #\space) + ;; mnt_id par_id major:minor root mount_point ... + ((m-id-str p-id-str _ _ mp . _) + ;; Attempt to parse IDs, skip line on error + (catch 'invalid-argument + (lambda () + (let ((mount-id (string->number m-id-str)) + (parent-id (string->number p-id-str))) + ;; Add successfully parsed entry to list + (set! entries (cons (list mount-id parent-id mp) + entries)) + (loop))) ; Continue to next line + (lambda args + (format (current-error-port) + "Warning: Skipping mountinfo line due to parse error: ~s (~a)~%" + line args) + (loop)))) + (_ (loop)))))))) + ;; Return parsed entries in file order + (reverse entries))) + #:error-message-format "Error: Cannot read or parse mountinfo file ~s: ~a" + #:error-context (list path) + #:on-error '())) + + (define (safe-find-nested-mounts root-mount-point target-device) + "Find mount points that block the unmounting of ROOT-MOUNT-POINT. +TARGET-DEVICE argument is ignored. +Mountpoints are returned depth-first (in the order they can be unmounted). +ROOT-MOUNT-POINT is included." + (let* ((mountinfo (safe-parse-mountinfo (format #f "~a/self/mountinfo" PROC-DIR-NAME)))) + (define (safe-find-mounts-via-mountinfo accumulator lives root-mount-point) + (if (member root-mount-point accumulator) + (format (current-error-port) "Cycle detected~%")) + (let ((accumulator (cons root-mount-point accumulator))) + (if (= lives 0) + (begin + (format (current-error-port) "Error: Recursive mountpoints too deep.~%") + accumulator) + (let ((root-entry (find (lambda (entry) + (match entry + ((_ _ mp) (string=? mp root-mount-point)) + (_ #f))) ; Should not happen + mountinfo))) + (if root-entry + (let ((root-mount-id (car root-entry))) + (fold (lambda (entry accumulator) + (match entry + ((_ parent-id mp) + (if (= parent-id root-mount-id) + (safe-find-mounts-via-mountinfo accumulator + (- lives 1) + mp) + accumulator)) + (_ accumulator))) + accumulator + mountinfo)) + (begin + (format (current-error-port) "Error: Could not find mount ID for ~s in parsed mountinfo~%" + root-mount-point) + accumulator)))))) + (safe-find-mounts-via-mountinfo '() 100 root-mount-point))) + + ;;; End of inlined module (fuser) + + (define MOUNT-POINT "/") + + (define O_ACCMODE #o0003) + + (define (flags-has-write-access? flags) + "Given open FLAGS, return whether it (probably) signifies write access." + (and flags (not (= (logand flags O_ACCMODE) + O_RDONLY)))) + + (define (ask-to-kill? pid command) + "Ask whether to kill process with id PID (and command COMMAND)" + (format (current-error-port) "~%Process Found: PID ~a Command: ~s~%" pid command) + (format (current-error-port) "Kill process ~a? [y/N] " pid) + (force-output (current-error-port)) + (let ((response (read-char (current-input-port)))) + (if (not (eof-object? response)) + ;; Consume rest of line. + (read-line (current-input-port))) + (or (eqv? response #\y) + (eqv? response #\Y)))) + + (define (clean-up . args) + (let* ((error-port (current-error-port)) + (root-device (safe-get-device MOUNT-POINT)) + (mounts (safe-find-nested-mounts MOUNT-POINT root-device)) + (mount-devices (map safe-get-device mounts))) + (format error-port "Searching for processes writing to files on devices ~s (mount points ~s)...~%" + mount-devices mounts) + (let* ((our-pid (getpid)) + (pids (filter (lambda (pid) + (not (= pid our-pid))) + (safe-get-processes))) + (pids (filter (lambda (pid) + (match (filter-process-fd-flags pid + (safe-get-process-fds pid) + (lambda (fd flags) + (and (flags-has-write-access? flags) + (find (lambda (target-device) + (safe-fd-on-device? pid fd target-device)) + mount-devices)))) + ((x . _) #t) + (_ #f))) + pids))) + (format error-port "Found ~a process(es) matching the criteria.~%" (length pids)) + (for-each (lambda (pid) + (let ((command (safe-get-process-command pid))) + (if (ask-to-kill? pid command) + (safe-kill-process pid SIGKILL) + (format error-port "Skipping PID ~a (~s).~%" pid command)))) + pids)) + (format error-port "~%Process scan complete.~%") + (format error-port "Searching for nested mounts of ~s...~%" MOUNT-POINT) + (if (null? mounts) + (format error-port "No nested mount points found.~%") + (begin + (format error-port "Found nested mount points that would need unmounting:~%") + (for-each (lambda (mp) + (format #t " ~s~%" mp) + (safe-umount mp)) + mounts))))) + + (define (call-with-mounted-filesystem source mountpoint filesystem-type options proc) + (mount source mountpoint file-system-type options #:update-mtab? #f) + (catch #t + (lambda () + (proc) + (umount mountpoint)) + (lambda args + (umount mountpoint)))) + (sync) - (let ((null (%make-void-port "w"))) + (let* ((null (%make-void-port "w")) + (call-with-io-file (lambda (file-name proc) + (let ((port (open file-name O_RDWR))) + (set-current-input-port port) + (set-current-output-port port) + (set-current-error-port port) + (catch #t proc + (lambda args + (set-current-input-port null) + (set-current-output-port null) + (set-current-error-port null) + (close port)))))) + (with-mounted-filesystem (syntax-rules () + ((with-mounted-filesystem source filesystem-type mountpoint options . exps) + (call-with-mounted-filesystem source filesystem-type mountpoint options + (lambda () . exps)))))) + ;; Redirect the default output ports. (set-current-output-port null) (set-current-error-port null) @@ -363,21 +690,47 @@ (define %root-file-system-shepherd-service ;; root file system can be re-mounted read-only. (let loop ((n 10)) (unless (catch 'system-error + (lambda () + (mount #f "/" #f + (logior MS_REMOUNT MS_RDONLY) + #:update-mtab? #f) + #t) + (const #f)) + (when (zero? n) + ;; TODO: pivot-root to /run/booted-system/initrd first--so we don't try to kill ourselves. + ;; But that's on /gnu/store--which we don't have anymore. + ;; Instead, we'll just exempt outselves (see "our-pid")--and possibly miss things. + (with-mounted-filesystem "none" "proc" "/proc" 0 + (with-mounted-filesystem "none" "devtmpfs" "/dev" 0 + (catch 'system-error (lambda () - (mount #f "/" #f - (logior MS_REMOUNT MS_RDONLY) - #:update-mtab? #f) - #t) + (mknod "/dev/tty" 'char-special #o600 (+ (* 5 256) 0))) (const #f)) - (unless (zero? n) - ;; Yield to the other fibers. That gives logging fibers - ;; an opportunity to close log files so the 'mount' call - ;; doesn't fail with EBUSY. - ((@ (fibers) sleep) 1) - (loop (- n 1))))) + (call-with-io-file "/dev/tty" + (lambda () + ;; we don't have chvt :( + ;; (it would need to use %ioctl fd VT_ACTIVATE int on /dev/tty) + ;(chvt 12) + (clean-up))))) + ;; Should have been unmounted already--but we are paranoid + ;; (and probably were blocking ourselves anyway). + (catch 'system-error + (lambda () + (mount #f "/" #f + (logior MS_REMOUNT MS_RDONLY) + #:update-mtab? #f) + #t) + (const #f)) + ((@ (fibers) sleep) 10)) + (unless (zero? n) + ;; Yield to the other fibers. That gives logging fibers + ;; an opportunity to close log files so the 'mount' call + ;; doesn't fail with EBUSY. + ((@ (fibers) sleep) 1) + (loop (- n 1))))) - #f))) - (respawn? #f))) + #f))) + (respawn? #f))) (define root-file-system-service-type (shepherd-service-type 'root-file-system base-commit: 85b5c2c8f66aed05730f6c7bdeabfaadf619bb8f prerequisite-patch-id: 1a4781dff5873451484bba21bc0dc4617075cb55 prerequisite-patch-id: bbe7274727aa8e1bf4beee1acafbd0a3fdc9257a -- 2.49.0
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.