Public/Processes/Stop-VmProcessesUsingPath.ps1

<#
.SYNOPSIS
    Terminates every process on a Hyper-V VM whose open files, cwd,
    executable, or memory mappings touch a given filesystem path,
    escalating from SIGTERM to SIGKILL, and reports the outcome.
 
.DESCRIPTION
    Single-round-trip primitive used by uninstall flows that need to
    free a directory tree before removing it. The cmdlet scans the VM
    for processes that hold <Path> open, sends SIGTERM, then polls
    `kill -0` at 0.5s intervals up to <GraceSeconds>. Any SIGTERM
    survivors are sent SIGKILL and polled for up to 5 seconds (the
    kernel reap window); the results are split into TerminatedPids
    (exited under SIGTERM), KilledPids (reaped after SIGKILL), and
    StillAlive (unreaped even after SIGKILL - typically uninterruptible
    sleep, e.g. blocked in disk I/O or an NFS RPC). A non-empty
    StillAlive causes the cmdlet to throw (exit 64 on the remote side).
 
    The remote scanner prefers `lsof +D` (catches open files, mmaps,
    cwd, exe), falls back to `fuser -m` (open files + mmaps under the
    mountpoint), and finally walks /proc/*/exe, /proc/*/cwd, and
    /proc/*/maps directly. Three branches because none of the three is
    universally available across the minimal Ubuntu images this module
    targets and the proc-walk fallback is the last-resort that needs
    nothing but coreutils + grep + awk.
 
    Path is validated host-side before any SSH call: must be a
    non-empty absolute path with no `..` segments, no NUL byte, and no
    single quote (the remote script embeds it inside a single-quoted
    bash assignment, matching the rest of the module).
 
.PARAMETER SshClient
    A live Renci.SshNet.SshClient. The caller owns the client's
    lifecycle - this function neither connects nor disposes it.
 
.PARAMETER Path
    Absolute path on the VM whose holders should be terminated. May be
    a file or a directory; the scanner treats it as a tree root.
 
.PARAMETER GraceSeconds
    Non-negative integer. The cmdlet polls `kill -0` against the
    SIGTERM'd PIDs at 0.5s intervals for at most this many seconds.
    GraceSeconds = 0 skips the poll loop entirely - PIDs are
    classified as "still alive" immediately if they have not already
    exited by the time the kill returns.
 
.OUTPUTS
    PSCustomObject with three integer-array properties:
      - TerminatedPids: PIDs that exited within the grace window.
      - KilledPids : PIDs that survived SIGTERM but were reaped
                        within the 5-second SIGKILL window.
      - StillAlive : PIDs that survived SIGKILL too (typically
                        stuck in uninterruptible sleep). Non-empty
                        StillAlive causes the cmdlet to throw.
 
.EXAMPLE
    Stop-VmProcessesUsingPath -SshClient $ssh -Path '/opt/jdk-21' -GraceSeconds 5
 
.NOTES
    On-VM commands run under sudo. The caller is responsible for
    ensuring the SSH user has password-less sudo.
#>

function Stop-VmProcessesUsingPath {
    [CmdletBinding()]
    [OutputType([PSCustomObject])]
    param(
        [Parameter(Mandatory)]
        [object] $SshClient,

        [Parameter(Mandatory)]
        [string] $Path,

        [Parameter(Mandatory)]
        [int] $GraceSeconds
    )

    # Host-side validation. Mirrors Remove-VmDirectory's path rules so
    # an entire install/uninstall flow that pre-validates against one
    # cmdlet's contract will not be surprised by another.
    if ([string]::IsNullOrEmpty($Path)) {
        throw "Stop-VmProcessesUsingPath: -Path must be a non-empty string."
    }
    if (-not $Path.StartsWith('/')) {
        throw ("Stop-VmProcessesUsingPath: -Path '$Path' must be an " +
            "absolute path (start with '/').")
    }
    if ($Path.Contains([char]0)) {
        throw "Stop-VmProcessesUsingPath: -Path contains a NUL byte."
    }
    if ($Path.Contains("'")) {
        throw ("Stop-VmProcessesUsingPath: -Path '$Path' contains a " +
            "single quote, which is not allowed.")
    }
    if ($Path.Split('/') -contains '..') {
        throw ("Stop-VmProcessesUsingPath: -Path '$Path' contains a " +
            "'..' segment.")
    }
    if ($GraceSeconds -lt 0) {
        throw ("Stop-VmProcessesUsingPath: -GraceSeconds must be " +
            "non-negative (got $GraceSeconds).")
    }

    $vmHost = if ($SshClient.PSObject.Properties['ConnectionInfo'] -and $SshClient.ConnectionInfo) {
        $SshClient.ConnectionInfo.Host
    } else { '(unknown)' }

    # EX_USAGE-adjacent exit code used module-wide for "remote side
    # ran fine but the operation could not complete fully" - here, one
    # or more processes survived SIGTERM.
    $survivorExitCode = 64

    # Poll loop is generated only when grace > 0. Plan calls this out
    # explicitly: GraceSeconds=0 skips waiting entirely. We still
    # classify after the kill so terminated-immediately is recorded.
    $pollLoop = if ($GraceSeconds -gt 0) {
        $totalTenths = $GraceSeconds * 10
        @"
 
elapsed=0
while [ "`$elapsed" -lt $totalTenths ]; do
    any_alive=0
    for pid in `$initial; do
        if sudo kill -0 "`$pid" 2>/dev/null; then
            any_alive=1
            break
        fi
    done
    if [ "`$any_alive" -eq 0 ]; then break; fi
    sleep 0.5
    elapsed=`$((elapsed + 5))
done
"@

    } else { '' }

    # SIGKILL reap window is fixed at 5s (10 polls * 0.5s). The kernel
    # reaps a SIGKILL'd process within microseconds unless the task is
    # stuck in uninterruptible sleep (D state) waiting on disk I/O,
    # NFS, or similar - cases that 5s will not unblock either, so a
    # longer wait would just delay the inevitable StillAlive report.
    $reapTenths = 50
    $script = @"
set -euo pipefail
path='$Path'
pids=''
 
if command -v lsof >/dev/null 2>&1; then
    pids=`$(sudo lsof -t +D "`$path" 2>/dev/null | sort -un | tr '\n' ' ' || true)
elif command -v fuser >/dev/null 2>&1; then
    pids=`$(sudo fuser -m "`$path" 2>/dev/null | tr -s ' \t\n' '\n' | grep -v '^`$' | sort -un | tr '\n' ' ' || true)
else
    found=''
    for d in /proc/[0-9]*; do
        [ -d "`$d" ] || continue
        pid=`${d#/proc/}
        match=0
        for link in exe cwd; do
            t=`$(sudo readlink "`$d/`$link" 2>/dev/null || true)
            case "`$t" in
                "`$path"|"`$path"/*) match=1; break ;;
            esac
        done
        if [ `$match -eq 0 ] && [ -r "`$d/maps" ]; then
            if sudo awk -v p="`$path" '`$NF == p || index(`$NF, p"/") == 1 { f=1; exit } END { exit !f }' "`$d/maps" 2>/dev/null; then
                match=1
            fi
        fi
        if [ `$match -eq 1 ]; then found="`$found `$pid"; fi
    done
    pids=`$(printf '%s\n' `$found | grep -v '^`$' | sort -un | tr '\n' ' ' || true)
fi
 
pids=`$(echo `$pids | xargs || true)
 
if [ -z "`$pids" ]; then
    echo "TERMINATED= KILLED= STILL_ALIVE="
    exit 0
fi
 
initial="`$pids"
echo "`$pids" | xargs -r sudo kill -TERM 2>/dev/null || true
$pollLoop
terminated=''
sigterm_survivors=''
for pid in `$initial; do
    if sudo kill -0 "`$pid" 2>/dev/null; then
        sigterm_survivors="`$sigterm_survivors `$pid"
    else
        terminated="`$terminated `$pid"
    fi
done
 
# SIGKILL escalation. Only survivors are signalled - SIGKILL to a PID
# that has already exited is harmless but kill returns non-zero and
# would mask real failures under `set -e` without the `|| true`.
killed=''
still_alive=''
if [ -n "`$sigterm_survivors" ]; then
    echo "`$sigterm_survivors" | xargs -r sudo kill -KILL 2>/dev/null || true
 
    kelapsed=0
    while [ "`$kelapsed" -lt $reapTenths ]; do
        any_alive=0
        for pid in `$sigterm_survivors; do
            if sudo kill -0 "`$pid" 2>/dev/null; then
                any_alive=1
                break
            fi
        done
        if [ "`$any_alive" -eq 0 ]; then break; fi
        sleep 0.5
        kelapsed=`$((kelapsed + 5))
    done
 
    for pid in `$sigterm_survivors; do
        if sudo kill -0 "`$pid" 2>/dev/null; then
            still_alive="`$still_alive `$pid"
        else
            killed="`$killed `$pid"
        fi
    done
fi
 
terminated=`$(echo `$terminated | xargs || true)
killed=`$(echo `$killed | xargs || true)
still_alive=`$(echo `$still_alive | xargs || true)
 
echo "TERMINATED=`$terminated KILLED=`$killed STILL_ALIVE=`$still_alive"
 
if [ -n "`$still_alive" ]; then
    exit $survivorExitCode
fi
exit 0
"@


    # Windows PowerShell here-strings use CRLF; remote bash interprets
    # the trailing \r as part of the token. Normalise to LF, same as
    # the rest of the module.
    $script = $script -replace "`r`n", "`n"

    $result = Invoke-SshClientCommand -SshClient $SshClient -Command $script

    # Locate the result line in stdout. The script always prints
    # exactly one TERMINATED= line; missing it means the remote bash
    # crashed before reaching the report - surface stderr in that case.
    $resultLine = $null
    if ($result.Output) {
        foreach ($line in ($result.Output -split "`n")) {
            if ($line -match '^TERMINATED=') { $resultLine = $line; break }
        }
    }

    if ($null -eq $resultLine) {
        throw ("Stop-VmProcessesUsingPath failed (vm: $vmHost, path: $Path, " +
            "exit $($result.ExitStatus)): no result line in stdout. " +
            "stdout: $($result.Output) stderr: $($result.Error)")
    }

    # Parse "TERMINATED=<a b c> KILLED=<...> STILL_ALIVE=<...>". Each
    # capture is lazy so the engine can find the literal field
    # separators even when a list contains spaces.
    if ($resultLine -notmatch '^TERMINATED=(?<t>.*?)\s+KILLED=(?<k>.*?)\s+STILL_ALIVE=(?<s>.*?)\s*$') {
        throw ("Stop-VmProcessesUsingPath failed (vm: $vmHost, path: $Path): " +
            "unparseable result line '$resultLine'.")
    }

    $terminatedPids = @()
    $killedPids     = @()
    $stillAlive     = @()
    if ($Matches['t'].Trim()) {
        $terminatedPids = @($Matches['t'].Trim() -split '\s+' | ForEach-Object { [int]$_ })
    }
    if ($Matches['k'].Trim()) {
        $killedPids = @($Matches['k'].Trim() -split '\s+' | ForEach-Object { [int]$_ })
    }
    if ($Matches['s'].Trim()) {
        $stillAlive = @($Matches['s'].Trim() -split '\s+' | ForEach-Object { [int]$_ })
    }

    $resultObject = [PSCustomObject]@{
        TerminatedPids = $terminatedPids
        KilledPids     = $killedPids
        StillAlive     = $stillAlive
    }

    if ($result.ExitStatus -eq $survivorExitCode -or $stillAlive.Count -gt 0) {
        throw ("Stop-VmProcessesUsingPath: $($stillAlive.Count) process(es) " +
            "still hold '$Path' on VM $vmHost after SIGTERM + " +
            "$GraceSeconds`s grace + SIGKILL + 5s reap. " +
            "StillAlive: $($stillAlive -join ', '). " +
            "Killed: $($killedPids -join ', '). " +
            "Terminated: $($terminatedPids -join ', ').")
    }

    if ($result.ExitStatus -ne 0) {
        throw ("Stop-VmProcessesUsingPath failed (vm: $vmHost, path: $Path, " +
            "exit $($result.ExitStatus)). stdout: $($result.Output) " +
            "stderr: $($result.Error)")
    }

    return $resultObject
}