Public/Start-BykaCCSupervisor.ps1

function Start-BykaCCSupervisor {
<#
.SYNOPSIS
Claude Code Telegram-bridge supervisor.

.DESCRIPTION
Runs the long-running `claude --channels` process with restart-on-exit,
per-instance state-dir separation, repo-switcher apply-watcher integration,
sleep prevention, and crash-loop backoff. Designed to be launched at user
login via a minimized PowerShell 7 window (see New-BykaCCSupervisorLauncher).

CRITICAL: Claude CLI requires a REAL interactive terminal.
  - Do NOT pipe stdout/stderr (breaks TTY detection)
  - Do NOT use -WindowStyle Hidden (no terminal allocated)
  - ONLY use -WindowStyle Minimized (real terminal, just minimized)

.PARAMETER FallbackProjectDir
Absolute path to the project directory used when active-project.txt is
missing/invalid. Must be a real directory under -BaseDir. Required.

.PARAMETER BaseDir
The trusted base directory under which all project dirs must live. Defaults
to the parent of -FallbackProjectDir (e.g. C:\Dev when FallbackProjectDir
is C:\Dev\byka). Used to validate active-project.txt entries against the
Dev-root prefix.

.PARAMETER StateDir
Per-instance state directory. Defaults to
"$env:USERPROFILE\.claude\channels\telegram-cli".

.PARAMETER TelegramPlugin
Claude Code Telegram plugin identifier. Defaults to
'plugin:telegram@claude-plugins-official'.

.PARAMETER RepoSwitcherRoot
Where the @kagdaci/repo-switcher npm package is installed (used to locate
ps/apply-watcher.ps1). Defaults to
"$env:APPDATA\npm\node_modules\@kagdaci\repo-switcher".

.PARAMETER RepoNameAllowlist
Regex used to validate the leaf name of active-project.txt entries. Defaults
to '^[A-Za-z0-9_\-\.]{1,64}$'.

.PARAMETER RestartDelay
Seconds between restarts on clean exit. Default 5.

.EXAMPLE
Start-BykaCCSupervisor -FallbackProjectDir 'C:\Dev\byka'

.EXAMPLE
Start-BykaCCSupervisor -FallbackProjectDir 'D:\Code\primary' -BaseDir 'D:\Code'
#>

    [CmdletBinding()]
    param(
        [Parameter(Mandatory)]
        [ValidateNotNullOrEmpty()]
        [ValidateScript({ Test-Path $_ -PathType Container }, ErrorMessage = "FallbackProjectDir must exist as a directory: '{0}'")]
        [string]$FallbackProjectDir,

        [ValidateNotNullOrEmpty()]
        [string]$BaseDir = $(Split-Path -Parent $FallbackProjectDir),

        [ValidateNotNullOrEmpty()]
        [string]$StateDir = "$env:USERPROFILE\.claude\channels\telegram-cli",

        [ValidateNotNullOrEmpty()]
        [string]$TelegramPlugin = 'plugin:telegram@claude-plugins-official',

        [ValidateNotNullOrEmpty()]
        [string]$RepoSwitcherRoot = "$env:APPDATA\npm\node_modules\@kagdaci\repo-switcher",

        [ValidateNotNullOrEmpty()]
        [string]$RepoNameAllowlist = '^[A-Za-z0-9_\-\.]{1,64}$',

        [ValidateRange(1, 3600)]
        [int]$RestartDelay = 5,

        [ValidateNotNullOrEmpty()]
        [string]$PwshPath = (Get-Process -Id $PID).Path
    )

    # [G4] security F2: $BaseDir may have been derived from a -FallbackProjectDir
    # that's a root path (e.g. 'C:\') -- Split-Path -Parent returns ''. Reject
    # explicitly so the prefix regex doesn't collapse to '^\\[^\\]+'.
    if ([string]::IsNullOrWhiteSpace($BaseDir)) {
        throw "BaseDir is empty -- FallbackProjectDir '$FallbackProjectDir' may be a root path. Pass -BaseDir explicitly."
    }

    # [G4] security F6 / qa F1: reset the dedup-warn state on every supervisor
    # entry so repeated invocations in the same PS session start clean.
    $script:LastResolveWarn = ''

    # [G4] qa F5: $Host.UI.RawUI is $null in non-interactive hosts (CI,
    # pwsh -NonInteractive). Best-effort -- the title is a UX nicety, not
    # load-bearing for supervisor function.
    try { $Host.UI.RawUI.WindowTitle = "Claude Code Channels - CLI bot (Bot Y)" } catch {}

    # Per-instance state dir -- CLI uses Bot Y here (TELEGRAM_TASKS_BOT_TOKEN).
    $env:TELEGRAM_STATE_DIR = $StateDir

    $LogFile            = Join-Path $StateDir 'channels.log'
    $ActiveProjectFile  = "$env:USERPROFILE\.claude\channels\active-project.txt"
    $SupervisorPidFile  = "$env:USERPROFILE\.claude\channels\supervisor.pid"
    $SupervisorCwdFile  = "$env:USERPROFILE\.claude\channels\supervisor.cwd"
    $WatcherPidFile     = "$env:USERPROFILE\.claude\channels\watcher.pid"
    $ApplyWatcherScript = Join-Path $RepoSwitcherRoot 'ps\apply-watcher.ps1'

    # Ensure state dir exists.
    if (-not (Test-Path $StateDir)) {
        New-Item -ItemType Directory -Path $StateDir -Force | Out-Null
    }

    # Log rotation -- keep current log under 5MB.
    if (Test-Path $LogFile) {
        $sz = (Get-Item $LogFile).Length / 1MB
        if ($sz -gt 5) {
            Move-Item $LogFile "$LogFile.old" -Force
        }
    }

    Write-LogLine -Message "========== Service starting (CLI bot via STATE_DIR=$StateDir, BaseDir=$BaseDir, Fallback=$FallbackProjectDir) ==========" -Level 'START' -LogFile $LogFile

    # Kill ONLY our prior CLI bun, not VS Code's.
    Stop-OurStaleBun -StateDir $StateDir -LogFile $LogFile

    # Check for existing channels process (filtered to non-VS-Code).
    $existing = Get-ChannelsProcess
    if ($existing) {
        Write-LogLine -Message "Already running (PID $($existing.ProcessId)). Exiting." -Level 'WARN' -LogFile $LogFile
        return
    }

    # Add Bun to PATH.
    $env:PATH += ";$env:USERPROFILE\.bun\bin"

    # Check claude on PATH.
    if (-not (Get-Command claude -ErrorAction SilentlyContinue)) {
        Write-LogLine -Message "claude not in PATH -- exiting in 10s (no manual Enter required)" -Level 'ERROR' -LogFile $LogFile
        Start-Sleep -Seconds 10
        return
    }

    Write-FileAtomic -Path $SupervisorPidFile -Content $PID
    Write-LogLine -Message "Wrote supervisor.pid=$PID" -Level 'INFO' -LogFile $LogFile

    Start-ApplyWatcher -ApplyWatcherScript $ApplyWatcherScript -WatcherPidFile $WatcherPidFile -LogFile $LogFile -PwshPath $PwshPath -RepoSwitcherRoot $RepoSwitcherRoot

    # Initial project dir (re-resolved on every supervisor iteration).
    $ProjectDir = Resolve-ProjectDir `
        -File $ActiveProjectFile `
        -Fallback $FallbackProjectDir `
        -BaseDir $BaseDir `
        -Allowlist $RepoNameAllowlist `
        -LogFile $LogFile
    Set-Location $ProjectDir
    Write-LogLine -Message "Working dir: $ProjectDir (resolved from active-project.txt; fallback=$FallbackProjectDir)" -Level 'INFO' -LogFile $LogFile

    # Load long-lived Anthropic OAuth token from the neutral channels/ location.
    # NOT a Telegram token -- it's Anthropic API auth shared across surfaces.
    $TokenFile = "$env:USERPROFILE\.claude\channels\.token"
    if (Test-Path $TokenFile) {
        $env:ANTHROPIC_API_KEY = (Get-Content $TokenFile -Raw).Trim()
        Write-LogLine -Message "Loaded long-lived Anthropic token from .token file" -Level 'INFO' -LogFile $LogFile
    } else {
        Write-LogLine -Message "No .token file found -- falling back to OAuth (may expire)" -Level 'WARN' -LogFile $LogFile
    }

    # Prevent sleep (non-fatal if it fails).
    # [G4] qa F2: Add-Type with a fixed -Namespace/-Name throws "type exists"
    # on re-import within the same process. Guard with type-presence check.
    try {
        if (-not ([System.Management.Automation.PSTypeName]'Win32.KeepAwake').Type) {
            Add-Type -MemberDefinition '[DllImport("kernel32.dll")] public static extern uint SetThreadExecutionState(uint f);' -Namespace Win32 -Name KeepAwake -ErrorAction Stop
        }
        [Win32.KeepAwake]::SetThreadExecutionState([uint32]2147483649) | Out-Null
        Write-LogLine -Message "Sleep prevention enabled" -Level 'INFO' -LogFile $LogFile
    } catch {
        Write-LogLine -Message "Sleep prevention failed (non-fatal): $_" -Level 'WARN' -LogFile $LogFile
    }

    # Retry loop.
    $fails = 0
    $session = 0

    try {
        while ($true) {
            $session++

            # Snapshot launch cwd for apply-pending.js validation.
            Write-FileAtomic -Path $SupervisorCwdFile -Content $ProjectDir

            # Health-check watcher; respawn if missing/dead.
            if (-not (Test-WatcherAlive -WatcherPidFile $WatcherPidFile)) {
                Write-LogLine -Message "Watcher dead/missing -- respawning" -Level 'WARN' -LogFile $LogFile
                Start-ApplyWatcher -ApplyWatcherScript $ApplyWatcherScript -WatcherPidFile $WatcherPidFile -LogFile $LogFile -PwshPath $PwshPath -RepoSwitcherRoot $RepoSwitcherRoot
            }

            # Re-resolve project dir on every iteration so /switch-repo's file
            # write is picked up after the previous claude session exits.
            $NewProjectDir = Resolve-ProjectDir `
                -File $ActiveProjectFile `
                -Fallback $FallbackProjectDir `
                -BaseDir $BaseDir `
                -Allowlist $RepoNameAllowlist `
                -LogFile $LogFile
            if ($NewProjectDir -ne $ProjectDir) {
                Write-LogLine -Message "#$session Project dir changed: $ProjectDir -> $NewProjectDir (active-project.txt updated)" -Level 'INFO' -LogFile $LogFile
                # TOCTOU defense: between Resolve-ProjectDir validating + Set-Location
                # executing, the dir could vanish. Stay in the old cwd on failure.
                try {
                    Set-Location -Path $NewProjectDir -ErrorAction Stop
                    $ProjectDir = $NewProjectDir
                } catch {
                    Write-LogLine -Message "#$session Set-Location to '$NewProjectDir' FAILED: $($_.Exception.Message). Staying in $ProjectDir." -Level 'ERROR' -LogFile $LogFile
                }
            }

            # Snapshot bun PIDs BEFORE launching claude -- anything new = ours.
            $preBun = @(Get-Process bun -ErrorAction SilentlyContinue | ForEach-Object { $_.Id })

            # Re-check duplicate (could have been launched by another window).
            $dup = Get-ChannelsProcess
            if ($dup) {
                Write-LogLine -Message "#$session Another channels process found (PID $($dup.ProcessId)). Waiting 60s." -Level 'WARN' -LogFile $LogFile
                Start-Sleep 60
                continue
            }

            $t0 = Get-Date
            Write-LogLine -Message "#$session Launching claude --channels (Bot Y via STATE_DIR=$StateDir, cwd=$ProjectDir)" -Level 'START' -LogFile $LogFile

            # Run claude directly -- NO piping, NO redirection.
            claude --channels $TelegramPlugin

            $code = $LASTEXITCODE

            # Bug 2 fix (2026-05-12, preserved from start-channels.ps1): drain
            # any buffered keystrokes left over from the prior claude --channels
            # invocation. Without this, the NEXT claude session can appear
            # "frozen" until the user presses Enter -- claude's raw-mode stdin
            # handling on exit leaves the console with buffered EOL/EOF events
            # that leak into the next invocation. Best-effort: silently no-ops
            # when stdin isn't a real console (script reload, redirect, etc.).
            try {
                while ([Console]::KeyAvailable) { [void][Console]::ReadKey($true) }
            } catch {
                # Non-interactive context -- safe to ignore.
            }

            $dur = (Get-Date) - $t0
            Write-LogLine -Message "#$session Ended (exit=$code, ran=$([int]$dur.TotalSeconds)s)" -Level 'WARN' -LogFile $LogFile

            # Kill ONLY bun processes that are new since pre-snapshot -- these are ours.
            # VS Code's bun (Bot X) is in $preBun and stays untouched.
            $ourBun = @(Get-Process bun -ErrorAction SilentlyContinue | Where-Object { $_.Id -notin $preBun })
            if ($ourBun.Count -gt 0) {
                $ourBun | Stop-Process -Force -ErrorAction SilentlyContinue
                Write-LogLine -Message "Killed $($ourBun.Count) new-since-launch bun process(es); VS Code's bun untouched" -Level 'INFO' -LogFile $LogFile
            }

            if ($dur.TotalSeconds -lt 30) {
                $fails++
                if ($fails -ge 10) {
                    Write-LogLine -Message "10 quick failures. Stopping. Exiting in 10s (no manual Enter required)." -Level 'ERROR' -LogFile $LogFile
                    try { [Win32.KeepAwake]::SetThreadExecutionState([uint32]2147483648) | Out-Null } catch {}
                    Start-Sleep -Seconds 10
                    return
                }
                $wait = [math]::Min($RestartDelay * [math]::Pow(2, $fails - 1), 300)
                Write-LogLine -Message "Quick fail #$fails. Backoff ${wait}s." -Level 'ERROR' -LogFile $LogFile
                Start-Sleep $wait
            } else {
                $fails = 0
                Write-LogLine -Message "Restarting in ${RestartDelay}s..." -Level 'INFO' -LogFile $LogFile
                Start-Sleep $RestartDelay
            }

            try { [Win32.KeepAwake]::SetThreadExecutionState([uint32]2147483649) | Out-Null } catch {}
        }
    } finally {
        Stop-ApplyWatcher -WatcherPidFile $WatcherPidFile -LogFile $LogFile
        Remove-Item -LiteralPath $SupervisorPidFile, $SupervisorCwdFile -ErrorAction SilentlyContinue
        Write-LogLine -Message "Supervisor exiting -- cleaned up PID/cwd files + watcher" -Level 'INFO' -LogFile $LogFile
    }
}