Public/Start-BykaCCSupervisor.ps1
|
function Start-BykaCCSupervisor { <# .SYNOPSIS Claude Code Telegram-bridge supervisor. .DESCRIPTION Runs the long-running `claude --channels` process with restart-on-exit, per-instance state-dir separation, repo-switcher apply-watcher integration, sleep prevention, and crash-loop backoff. Designed to be launched at user login via a minimized PowerShell 7 window (see New-BykaCCSupervisorLauncher). CRITICAL: Claude CLI requires a REAL interactive terminal. - Do NOT pipe stdout/stderr (breaks TTY detection) - Do NOT use -WindowStyle Hidden (no terminal allocated) - ONLY use -WindowStyle Minimized (real terminal, just minimized) .PARAMETER FallbackProjectDir Absolute path to the project directory used when active-project.txt is missing/invalid. Must be a real directory under -BaseDir. Required. .PARAMETER BaseDir The trusted base directory under which all project dirs must live. Defaults to the parent of -FallbackProjectDir (e.g. C:\Dev when FallbackProjectDir is C:\Dev\byka). Used to validate active-project.txt entries against the Dev-root prefix. .PARAMETER StateDir Per-instance state directory. Defaults to "$env:USERPROFILE\.claude\channels\telegram-cli". .PARAMETER TelegramPlugin Claude Code Telegram plugin identifier. Defaults to 'plugin:telegram@claude-plugins-official'. .PARAMETER RepoSwitcherRoot Where the @kagdaci/repo-switcher npm package is installed (used to locate ps/apply-watcher.ps1). Defaults to "$env:APPDATA\npm\node_modules\@kagdaci\repo-switcher". .PARAMETER RepoNameAllowlist Regex used to validate the leaf name of active-project.txt entries. Defaults to '^[A-Za-z0-9_\-\.]{1,64}$'. .PARAMETER RestartDelay Seconds between restarts on clean exit. Default 5. .EXAMPLE Start-BykaCCSupervisor -FallbackProjectDir 'C:\Dev\byka' .EXAMPLE Start-BykaCCSupervisor -FallbackProjectDir 'D:\Code\primary' -BaseDir 'D:\Code' #> [CmdletBinding()] param( [Parameter(Mandatory)] [ValidateNotNullOrEmpty()] [ValidateScript({ Test-Path $_ -PathType Container }, ErrorMessage = "FallbackProjectDir must exist as a directory: '{0}'")] [string]$FallbackProjectDir, [ValidateNotNullOrEmpty()] [string]$BaseDir = $(Split-Path -Parent $FallbackProjectDir), [ValidateNotNullOrEmpty()] [string]$StateDir = "$env:USERPROFILE\.claude\channels\telegram-cli", [ValidateNotNullOrEmpty()] [string]$TelegramPlugin = 'plugin:telegram@claude-plugins-official', [ValidateNotNullOrEmpty()] [string]$RepoSwitcherRoot = "$env:APPDATA\npm\node_modules\@kagdaci\repo-switcher", [ValidateNotNullOrEmpty()] [string]$RepoNameAllowlist = '^[A-Za-z0-9_\-\.]{1,64}$', [ValidateRange(1, 3600)] [int]$RestartDelay = 5, [ValidateNotNullOrEmpty()] [string]$PwshPath = (Get-Process -Id $PID).Path ) # [G4] security F2: $BaseDir may have been derived from a -FallbackProjectDir # that's a root path (e.g. 'C:\') -- Split-Path -Parent returns ''. Reject # explicitly so the prefix regex doesn't collapse to '^\\[^\\]+'. if ([string]::IsNullOrWhiteSpace($BaseDir)) { throw "BaseDir is empty -- FallbackProjectDir '$FallbackProjectDir' may be a root path. Pass -BaseDir explicitly." } # [G4] security F6 / qa F1: reset the dedup-warn state on every supervisor # entry so repeated invocations in the same PS session start clean. $script:LastResolveWarn = '' # [G4] qa F5: $Host.UI.RawUI is $null in non-interactive hosts (CI, # pwsh -NonInteractive). Best-effort -- the title is a UX nicety, not # load-bearing for supervisor function. try { $Host.UI.RawUI.WindowTitle = "Claude Code Channels - CLI bot (Bot Y)" } catch {} # Per-instance state dir -- CLI uses Bot Y here (TELEGRAM_TASKS_BOT_TOKEN). $env:TELEGRAM_STATE_DIR = $StateDir $LogFile = Join-Path $StateDir 'channels.log' $ActiveProjectFile = "$env:USERPROFILE\.claude\channels\active-project.txt" $SupervisorPidFile = "$env:USERPROFILE\.claude\channels\supervisor.pid" $SupervisorCwdFile = "$env:USERPROFILE\.claude\channels\supervisor.cwd" $WatcherPidFile = "$env:USERPROFILE\.claude\channels\watcher.pid" $ApplyWatcherScript = Join-Path $RepoSwitcherRoot 'ps\apply-watcher.ps1' # Ensure state dir exists. if (-not (Test-Path $StateDir)) { New-Item -ItemType Directory -Path $StateDir -Force | Out-Null } # Log rotation -- keep current log under 5MB. if (Test-Path $LogFile) { $sz = (Get-Item $LogFile).Length / 1MB if ($sz -gt 5) { Move-Item $LogFile "$LogFile.old" -Force } } Write-LogLine -Message "========== Service starting (CLI bot via STATE_DIR=$StateDir, BaseDir=$BaseDir, Fallback=$FallbackProjectDir) ==========" -Level 'START' -LogFile $LogFile # Kill ONLY our prior CLI bun, not VS Code's. Stop-OurStaleBun -StateDir $StateDir -LogFile $LogFile # Check for existing channels process (filtered to non-VS-Code). $existing = Get-ChannelsProcess if ($existing) { Write-LogLine -Message "Already running (PID $($existing.ProcessId)). Exiting." -Level 'WARN' -LogFile $LogFile return } # Add Bun to PATH. $env:PATH += ";$env:USERPROFILE\.bun\bin" # Check claude on PATH. if (-not (Get-Command claude -ErrorAction SilentlyContinue)) { Write-LogLine -Message "claude not in PATH -- exiting in 10s (no manual Enter required)" -Level 'ERROR' -LogFile $LogFile Start-Sleep -Seconds 10 return } Write-FileAtomic -Path $SupervisorPidFile -Content $PID Write-LogLine -Message "Wrote supervisor.pid=$PID" -Level 'INFO' -LogFile $LogFile Start-ApplyWatcher -ApplyWatcherScript $ApplyWatcherScript -WatcherPidFile $WatcherPidFile -LogFile $LogFile -PwshPath $PwshPath -RepoSwitcherRoot $RepoSwitcherRoot # Initial project dir (re-resolved on every supervisor iteration). $ProjectDir = Resolve-ProjectDir ` -File $ActiveProjectFile ` -Fallback $FallbackProjectDir ` -BaseDir $BaseDir ` -Allowlist $RepoNameAllowlist ` -LogFile $LogFile Set-Location $ProjectDir Write-LogLine -Message "Working dir: $ProjectDir (resolved from active-project.txt; fallback=$FallbackProjectDir)" -Level 'INFO' -LogFile $LogFile # Load long-lived Anthropic OAuth token from the neutral channels/ location. # NOT a Telegram token -- it's Anthropic API auth shared across surfaces. $TokenFile = "$env:USERPROFILE\.claude\channels\.token" if (Test-Path $TokenFile) { $env:ANTHROPIC_API_KEY = (Get-Content $TokenFile -Raw).Trim() Write-LogLine -Message "Loaded long-lived Anthropic token from .token file" -Level 'INFO' -LogFile $LogFile } else { Write-LogLine -Message "No .token file found -- falling back to OAuth (may expire)" -Level 'WARN' -LogFile $LogFile } # Prevent sleep (non-fatal if it fails). # [G4] qa F2: Add-Type with a fixed -Namespace/-Name throws "type exists" # on re-import within the same process. Guard with type-presence check. try { if (-not ([System.Management.Automation.PSTypeName]'Win32.KeepAwake').Type) { Add-Type -MemberDefinition '[DllImport("kernel32.dll")] public static extern uint SetThreadExecutionState(uint f);' -Namespace Win32 -Name KeepAwake -ErrorAction Stop } [Win32.KeepAwake]::SetThreadExecutionState([uint32]2147483649) | Out-Null Write-LogLine -Message "Sleep prevention enabled" -Level 'INFO' -LogFile $LogFile } catch { Write-LogLine -Message "Sleep prevention failed (non-fatal): $_" -Level 'WARN' -LogFile $LogFile } # Retry loop. $fails = 0 $session = 0 try { while ($true) { $session++ # Snapshot launch cwd for apply-pending.js validation. Write-FileAtomic -Path $SupervisorCwdFile -Content $ProjectDir # Health-check watcher; respawn if missing/dead. if (-not (Test-WatcherAlive -WatcherPidFile $WatcherPidFile)) { Write-LogLine -Message "Watcher dead/missing -- respawning" -Level 'WARN' -LogFile $LogFile Start-ApplyWatcher -ApplyWatcherScript $ApplyWatcherScript -WatcherPidFile $WatcherPidFile -LogFile $LogFile -PwshPath $PwshPath -RepoSwitcherRoot $RepoSwitcherRoot } # Re-resolve project dir on every iteration so /switch-repo's file # write is picked up after the previous claude session exits. $NewProjectDir = Resolve-ProjectDir ` -File $ActiveProjectFile ` -Fallback $FallbackProjectDir ` -BaseDir $BaseDir ` -Allowlist $RepoNameAllowlist ` -LogFile $LogFile if ($NewProjectDir -ne $ProjectDir) { Write-LogLine -Message "#$session Project dir changed: $ProjectDir -> $NewProjectDir (active-project.txt updated)" -Level 'INFO' -LogFile $LogFile # TOCTOU defense: between Resolve-ProjectDir validating + Set-Location # executing, the dir could vanish. Stay in the old cwd on failure. try { Set-Location -Path $NewProjectDir -ErrorAction Stop $ProjectDir = $NewProjectDir } catch { Write-LogLine -Message "#$session Set-Location to '$NewProjectDir' FAILED: $($_.Exception.Message). Staying in $ProjectDir." -Level 'ERROR' -LogFile $LogFile } } # Snapshot bun PIDs BEFORE launching claude -- anything new = ours. $preBun = @(Get-Process bun -ErrorAction SilentlyContinue | ForEach-Object { $_.Id }) # Re-check duplicate (could have been launched by another window). $dup = Get-ChannelsProcess if ($dup) { Write-LogLine -Message "#$session Another channels process found (PID $($dup.ProcessId)). Waiting 60s." -Level 'WARN' -LogFile $LogFile Start-Sleep 60 continue } $t0 = Get-Date Write-LogLine -Message "#$session Launching claude --channels (Bot Y via STATE_DIR=$StateDir, cwd=$ProjectDir)" -Level 'START' -LogFile $LogFile # Run claude directly -- NO piping, NO redirection. claude --channels $TelegramPlugin $code = $LASTEXITCODE # Bug 2 fix (2026-05-12, preserved from start-channels.ps1): drain # any buffered keystrokes left over from the prior claude --channels # invocation. Without this, the NEXT claude session can appear # "frozen" until the user presses Enter -- claude's raw-mode stdin # handling on exit leaves the console with buffered EOL/EOF events # that leak into the next invocation. Best-effort: silently no-ops # when stdin isn't a real console (script reload, redirect, etc.). try { while ([Console]::KeyAvailable) { [void][Console]::ReadKey($true) } } catch { # Non-interactive context -- safe to ignore. } $dur = (Get-Date) - $t0 Write-LogLine -Message "#$session Ended (exit=$code, ran=$([int]$dur.TotalSeconds)s)" -Level 'WARN' -LogFile $LogFile # Kill ONLY bun processes that are new since pre-snapshot -- these are ours. # VS Code's bun (Bot X) is in $preBun and stays untouched. $ourBun = @(Get-Process bun -ErrorAction SilentlyContinue | Where-Object { $_.Id -notin $preBun }) if ($ourBun.Count -gt 0) { $ourBun | Stop-Process -Force -ErrorAction SilentlyContinue Write-LogLine -Message "Killed $($ourBun.Count) new-since-launch bun process(es); VS Code's bun untouched" -Level 'INFO' -LogFile $LogFile } if ($dur.TotalSeconds -lt 30) { $fails++ if ($fails -ge 10) { Write-LogLine -Message "10 quick failures. Stopping. Exiting in 10s (no manual Enter required)." -Level 'ERROR' -LogFile $LogFile try { [Win32.KeepAwake]::SetThreadExecutionState([uint32]2147483648) | Out-Null } catch {} Start-Sleep -Seconds 10 return } $wait = [math]::Min($RestartDelay * [math]::Pow(2, $fails - 1), 300) Write-LogLine -Message "Quick fail #$fails. Backoff ${wait}s." -Level 'ERROR' -LogFile $LogFile Start-Sleep $wait } else { $fails = 0 Write-LogLine -Message "Restarting in ${RestartDelay}s..." -Level 'INFO' -LogFile $LogFile Start-Sleep $RestartDelay } try { [Win32.KeepAwake]::SetThreadExecutionState([uint32]2147483649) | Out-Null } catch {} } } finally { Stop-ApplyWatcher -WatcherPidFile $WatcherPidFile -LogFile $LogFile Remove-Item -LiteralPath $SupervisorPidFile, $SupervisorCwdFile -ErrorAction SilentlyContinue Write-LogLine -Message "Supervisor exiting -- cleaned up PID/cwd files + watcher" -Level 'INFO' -LogFile $LogFile } } |