workflows/default/systems/runtime/modules/ProcessRegistry.psm1

<#
.SYNOPSIS
    Process lifecycle management module for dotbot runtime.
.DESCRIPTION
    Provides process registration, locking, activity logging, diagnostic
    logging, preflight checks, and task selection helpers.
    Extracted from launch-process.ps1 as part of v4 Phase 03 (#92).
#>


# --- Module-scope state (set via Initialize-ProcessRegistry) ---
$script:ProcessesDir = $null
$script:ControlDir = $null
$script:DiagLogPath = $null
$script:Settings = $null
$script:ProviderConfig = $null
$script:BotRoot = $null

function Initialize-ProcessRegistry {
    <#
    .SYNOPSIS
        Initialize module-scope state for ProcessRegistry functions.
    #>

    param(
        [Parameter(Mandatory)][string]$ProcessesDir,
        [Parameter(Mandatory)][string]$ControlDir,
        [string]$DiagLogPath,
        [object]$Settings,
        [object]$ProviderConfig,
        [string]$BotRoot
    )
    $script:ProcessesDir = $ProcessesDir
    $script:ControlDir = $ControlDir
    $script:DiagLogPath = $DiagLogPath
    $script:Settings = $Settings
    $script:ProviderConfig = $ProviderConfig
    $script:BotRoot = $BotRoot
}

function New-ProcessId {
    "proc-$([guid]::NewGuid().ToString().Substring(0,6))"
}

function Write-ProcessFile {
    param([string]$Id, [hashtable]$Data)
    $filePath = Join-Path $script:ProcessesDir "$Id.json"
    $tempFile = "$filePath.tmp"

    $retryCount = if ($script:Settings.operations.file_retry_count) { $script:Settings.operations.file_retry_count } else { 3 }
    $retryBaseMs = if ($script:Settings.operations.file_retry_base_ms) { $script:Settings.operations.file_retry_base_ms } else { 50 }
    for ($r = 0; $r -lt $retryCount; $r++) {
        try {
            $Data | ConvertTo-Json -Depth 10 | Set-Content -Path $tempFile -Encoding utf8NoBOM -NoNewline
            Move-Item -Path $tempFile -Destination $filePath -Force -ErrorAction Stop
            return
        } catch {
            if (Test-Path $tempFile) { Remove-Item $tempFile -Force -ErrorAction SilentlyContinue }
            if ($r -lt ($retryCount - 1)) {
                Start-Sleep -Milliseconds ($retryBaseMs * ($r + 1))
            } else {
                if (Get-Command Write-BotLog -ErrorAction SilentlyContinue) {
                    Write-BotLog -Level Warn -Message "Write-ProcessFile FAILED for $Id after $retryCount retries" -Exception $_
                }
            }
        }
    }
}

function Write-ProcessActivity {
    param([string]$Id, [string]$ActivityType, [string]$Message)
    if (Get-Command Write-BotLog -ErrorAction SilentlyContinue) {
        # Delegate to DotBotLog — handles per-process + global activity.jsonl
        Write-BotLog -Level Info -Message $Message -ProcessId $Id -Context @{ activity_type = $ActivityType }
    } else {
        # Fallback: direct file write if DotBotLog not loaded
        $logPath = Join-Path $script:ProcessesDir "$Id.activity.jsonl"
        $event = @{
            timestamp = (Get-Date).ToUniversalTime().ToString("o")
            type = $ActivityType
            message = $Message
            task_id = $env:DOTBOT_CURRENT_TASK_ID
            phase = $env:DOTBOT_CURRENT_PHASE
        } | ConvertTo-Json -Compress

        $retryCount = if ($script:Settings.operations.file_retry_count) { $script:Settings.operations.file_retry_count } else { 3 }
        $retryBaseMs = if ($script:Settings.operations.file_retry_base_ms) { $script:Settings.operations.file_retry_base_ms } else { 50 }
        for ($r = 0; $r -lt $retryCount; $r++) {
            try {
                $fs = [System.IO.FileStream]::new($logPath, [System.IO.FileMode]::Append, [System.IO.FileAccess]::Write, [System.IO.FileShare]::ReadWrite)
                $sw = [System.IO.StreamWriter]::new($fs, [System.Text.UTF8Encoding]::new($false))
                $sw.WriteLine($event)
                $sw.Close()
                $fs.Close()
                break
            } catch {
                if ($r -lt ($retryCount - 1)) { Start-Sleep -Milliseconds ($retryBaseMs * ($r + 1)) }
            }
        }
    }
}

function Test-ProcessStopSignal {
    param([string]$Id)
    $stopFile = Join-Path $script:ProcessesDir "$Id.stop"
    Test-Path $stopFile
}

function Acquire-ProcessLock {
    <#
    .SYNOPSIS
    Atomically acquire a process lock using FileMode.CreateNew.
    Returns $true if lock acquired, $false if another live process holds it.
    Automatically cleans stale locks (dead PIDs).
    #>

    [Diagnostics.CodeAnalysis.SuppressMessageAttribute(
        'PSUseApprovedVerbs',
        '',
        Justification = 'Acquire communicates lock semantics more clearly than the approved alternatives for this exported command.'
    )]
    param([string]$LockType)
    $lockPath = Join-Path $script:ControlDir "launch-$LockType.lock"

    # Check for existing lock and validate owner is alive
    if (Test-Path $lockPath) {
        $lockContent = Get-Content $lockPath -Raw -ErrorAction SilentlyContinue
        if ($lockContent) {
            try {
                Get-Process -Id ([int]$lockContent.Trim()) -ErrorAction Stop | Out-Null
                return $false  # Lock held by a live process
            } catch {
                # Owner PID is dead — remove stale lock
                Remove-Item $lockPath -Force -ErrorAction SilentlyContinue
            }
        } else {
            Remove-Item $lockPath -Force -ErrorAction SilentlyContinue
        }
    }

    # Atomic lock acquisition: CreateNew throws if file already exists
    try {
        $fs = [System.IO.File]::Open($lockPath, [System.IO.FileMode]::CreateNew, [System.IO.FileAccess]::Write, [System.IO.FileShare]::None)
        try {
            $bytes = [System.Text.Encoding]::UTF8.GetBytes($PID.ToString())
            $fs.Write($bytes, 0, $bytes.Length)
        } finally {
            $fs.Close()
        }
        return $true
    } catch [System.IO.IOException] {
        # Another process beat us to it — verify that process is alive
        Start-Sleep -Milliseconds 50
        $lockContent = Get-Content $lockPath -Raw -ErrorAction SilentlyContinue
        if ($lockContent) {
            try {
                Get-Process -Id ([int]$lockContent.Trim()) -ErrorAction Stop | Out-Null
                return $false  # Legitimate lock
            } catch {
                # Winner died immediately — clean up and retry once
                Remove-Item $lockPath -Force -ErrorAction SilentlyContinue
                try {
                    $fs = [System.IO.File]::Open($lockPath, [System.IO.FileMode]::CreateNew, [System.IO.FileAccess]::Write, [System.IO.FileShare]::None)
                    try {
                        $bytes = [System.Text.Encoding]::UTF8.GetBytes($PID.ToString())
                        $fs.Write($bytes, 0, $bytes.Length)
                    } finally {
                        $fs.Close()
                    }
                    return $true
                } catch {
                    return $false
                }
            }
        }
        return $false
    }
}

# Legacy aliases for backward compatibility
function Test-ProcessLock {
    param([string]$LockType)
    $lockPath = Join-Path $script:ControlDir "launch-$LockType.lock"
    if (-not (Test-Path $lockPath)) { return $false }
    $lockContent = Get-Content $lockPath -Raw -ErrorAction SilentlyContinue
    if (-not $lockContent) { return $false }
    try {
        Get-Process -Id ([int]$lockContent.Trim()) -ErrorAction Stop | Out-Null
        return $true
    } catch {
        Remove-Item $lockPath -Force -ErrorAction SilentlyContinue
        return $false
    }
}

function Set-ProcessLock {
    param([string]$LockType)
    $lockPath = Join-Path $script:ControlDir "launch-$LockType.lock"
    $PID.ToString() | Set-Content $lockPath -NoNewline -Encoding utf8NoBOM
}

function Remove-ProcessLock {
    param([string]$LockType)
    $lockPath = Join-Path $script:ControlDir "launch-$LockType.lock"
    Remove-Item $lockPath -Force -ErrorAction SilentlyContinue
}

function Test-Preflight {
    $checks = @()
    $allPassed = $true

    # git on PATH
    $gitCmd = Get-Command git -ErrorAction SilentlyContinue
    if ($gitCmd) {
        $checks += "git: OK"
    } else {
        $checks += "git: MISSING - git not found on PATH"
        $allPassed = $false
    }

    # Provider CLI on PATH
    $providerExe = $script:ProviderConfig.executable
    $providerDisplay = $script:ProviderConfig.display_name
    $providerCmd = Get-Command $providerExe -ErrorAction SilentlyContinue
    if ($providerCmd) {
        $checks += "${providerExe}: OK"
    } else {
        $checks += "${providerExe}: MISSING - $providerDisplay CLI not found on PATH"
        $allPassed = $false
    }

    # .bot directory exists
    if (Test-Path $script:BotRoot) {
        $checks += ".bot: OK"
    } else {
        $checks += ".bot: MISSING - $($script:BotRoot) not found (run 'dotbot init' first)"
        $allPassed = $false
    }

    # powershell-yaml module
    $yamlMod = Get-Module -ListAvailable powershell-yaml -ErrorAction SilentlyContinue
    if ($yamlMod) {
        $checks += "powershell-yaml: OK"
    } else {
        $checks += "powershell-yaml: MISSING - Install with: Install-Module powershell-yaml -Scope CurrentUser"
        $allPassed = $false
    }

    return @{ passed = $allPassed; checks = $checks }
}

function Add-YamlFrontMatter {
    param([string]$FilePath, [hashtable]$Metadata)
    $yaml = "---`n"
    foreach ($key in ($Metadata.Keys | Sort-Object)) {
        $yaml += "${key}: `"$($Metadata[$key])`"`n"
    }
    $yaml += "---`n`n"
    $existing = Get-Content $FilePath -Raw
    ($yaml + $existing) | Set-Content -Path $FilePath -Encoding utf8NoBOM -NoNewline
}

# Get-NextTodoTask: checks analysing/ for resumed tasks (answered questions), then todo/ for new tasks
function Get-NextTodoTask {
    param([switch]$Verbose)

    # First priority: check for analysing tasks that came back from needs-input
    $index = Get-TaskIndex
    $resumedTasks = @($index.Analysing.Values) | Sort-Object priority
    foreach ($candidate in $resumedTasks) {
        if ($candidate.file_path -and (Test-Path $candidate.file_path)) {
            try {
                $content = Get-Content -Path $candidate.file_path -Raw | ConvertFrom-Json
                $hasQR = $content.PSObject.Properties['questions_resolved'] -and $content.questions_resolved -and $content.questions_resolved.Count -gt 0
                $hasPQ = $content.PSObject.Properties['pending_question'] -and $content.pending_question
                if ($hasQR -and -not $hasPQ) {
                    Write-Status "Found resumed task (question answered): $($candidate.name)" -Type Info
                    $taskObj = @{
                        id = $content.id
                        name = $content.name
                        status = 'analysing'
                        priority = [int]$content.priority
                        effort = $content.effort
                        category = $content.category
                        type = $content.type
                        script_path = $content.script_path
                        mcp_tool = $content.mcp_tool
                        mcp_args = $content.mcp_args
                        skip_analysis = $content.skip_analysis
                        skip_worktree = $content.skip_worktree
                    }
                    if ($Verbose.IsPresent) {
                        $taskObj.description = $content.description
                        $taskObj.dependencies = $content.dependencies
                        $taskObj.acceptance_criteria = $content.acceptance_criteria
                        $taskObj.steps = $content.steps
                        $taskObj.applicable_agents = $content.applicable_agents
                        $taskObj.applicable_standards = $content.applicable_standards
                        $taskObj.file_path = $candidate.file_path
                        $taskObj.questions_resolved = if ($content.PSObject.Properties['questions_resolved']) { $content.questions_resolved } else { $null }
                        $taskObj.claude_session_id = if ($content.PSObject.Properties['claude_session_id']) { $content.claude_session_id } else { $null }
                        $taskObj.needs_interview = if ($content.PSObject.Properties['needs_interview']) { $content.needs_interview } else { $null }
                        $taskObj.working_dir = if ($content.PSObject.Properties['working_dir']) { $content.working_dir } else { $null }
                        $taskObj.external_repo = if ($content.PSObject.Properties['external_repo']) { $content.external_repo } else { $null }
                        $taskObj.research_prompt = if ($content.PSObject.Properties['research_prompt']) { $content.research_prompt } else { $null }
                    }
                    return @{
                        success = $true
                        task = $taskObj
                        message = "Resumed task (question answered): $($content.name)"
                    }
                }
            } catch {
                Write-BotLog -Level Warn -Message "Failed to read analysing task: $($candidate.file_path)" -Exception $_
            }
        }
    }

    # Second priority: get next todo task
    $result = Invoke-TaskGetNext -Arguments @{ prefer_analysed = $false; verbose = $Verbose.IsPresent }
    if ($result.task -and $result.task.status -eq 'todo') {
        return $result
    }

    return @{
        success = $true
        task = $null
        message = "No tasks available for analysis."
    }
}

function Get-NextWorkflowTask {
    param([switch]$Verbose, [string]$WorkflowFilter)

    # First priority: check for analysing tasks that came back from needs-input
    $index = Get-TaskIndex
    $resumedTasks = @($index.Analysing.Values)
    if ($WorkflowFilter) {
        $resumedTasks = @($resumedTasks | Where-Object { $_.workflow -eq $WorkflowFilter })
    }
    $resumedTasks = $resumedTasks | Sort-Object priority
    foreach ($candidate in $resumedTasks) {
        if ($candidate.file_path -and (Test-Path $candidate.file_path)) {
            try {
                $content = Get-Content -Path $candidate.file_path -Raw | ConvertFrom-Json
                $hasQR = $content.PSObject.Properties['questions_resolved'] -and $content.questions_resolved -and $content.questions_resolved.Count -gt 0
                $hasPQ = $content.PSObject.Properties['pending_question'] -and $content.pending_question
                if ($hasQR -and -not $hasPQ) {
                    Write-Status "Found resumed task (question answered): $($candidate.name)" -Type Info
                    $taskObj = @{
                        id = $content.id
                        name = $content.name
                        status = 'analysing'
                        priority = [int]$content.priority
                        effort = $content.effort
                        category = $content.category
                        type = $content.type
                        script_path = $content.script_path
                        mcp_tool = $content.mcp_tool
                        mcp_args = $content.mcp_args
                        skip_analysis = $content.skip_analysis
                        skip_worktree = $content.skip_worktree
                        workflow = $content.workflow
                        model = $content.model
                    }
                    if ($Verbose.IsPresent) {
                        $taskObj.description = $content.description
                        $taskObj.dependencies = $content.dependencies
                        $taskObj.acceptance_criteria = $content.acceptance_criteria
                        $taskObj.steps = $content.steps
                        $taskObj.applicable_agents = $content.applicable_agents
                        $taskObj.applicable_standards = $content.applicable_standards
                        $taskObj.file_path = $candidate.file_path
                        $taskObj.questions_resolved = if ($content.PSObject.Properties['questions_resolved']) { $content.questions_resolved } else { $null }
                        $taskObj.claude_session_id = if ($content.PSObject.Properties['claude_session_id']) { $content.claude_session_id } else { $null }
                        $taskObj.needs_interview = if ($content.PSObject.Properties['needs_interview']) { $content.needs_interview } else { $null }
                        $taskObj.working_dir = if ($content.PSObject.Properties['working_dir']) { $content.working_dir } else { $null }
                        $taskObj.external_repo = if ($content.PSObject.Properties['external_repo']) { $content.external_repo } else { $null }
                        $taskObj.research_prompt = if ($content.PSObject.Properties['research_prompt']) { $content.research_prompt } else { $null }
                    }
                    return @{
                        success = $true
                        task = $taskObj
                        message = "Resumed task (question answered): $($content.name)"
                    }
                }
            } catch {
                Write-BotLog -Level Warn -Message "Failed to read analysing task: $($candidate.file_path)" -Exception $_
            }
        }
    }

    # Second priority: prefer analysed tasks (ready for execution), then todo
    $wfFilterArgs = @{ prefer_analysed = $true; verbose = $Verbose.IsPresent }
    if ($WorkflowFilter) { $wfFilterArgs['workflow_filter'] = $WorkflowFilter }
    $result = Invoke-TaskGetNext -Arguments $wfFilterArgs
    return $result
}

function Test-DependencyDeadlock {
    param([string]$ProcessId)
    $deadlock = Get-DeadlockedTasks
    if ($deadlock.BlockedCount -gt 0) {
        $blockers    = $deadlock.BlockerNames -join ', '
        $deadlockMsg = "Dependency deadlock: $($deadlock.BlockedCount) todo task(s) are blocked by skipped prerequisite(s) [$blockers]. Workflow cannot continue automatically — reset or re-implement the skipped tasks to unblock the queue."
        Write-Status $deadlockMsg -Type Error
        Write-ProcessActivity -Id $ProcessId -ActivityType "text" -Message $deadlockMsg
        return $true
    }
    return $false
}

function Test-WorkflowComplete {
    <#
    .SYNOPSIS
    Returns $true when there are zero pending tasks matching the given workflow filter.

    .DESCRIPTION
    A workflow-filtered task-runner is "complete" when every task tagged with its
    workflow name is in a terminal state (done/skipped/cancelled) — i.e. none remain
    in todo, analysed, analysing, in-progress, or needs-input. The runner should
    then exit cleanly rather than poll forever for tasks that will never arrive.

    Fixes the "ghost runner" deadlock where a workflow task-runner (e.g. the
    kickstart-via-repo runner) enters its wait loop after the last workflow-scoped
    task completes, keeps workflow_alive=true in /api/state, and blocks the UI's
    generic "Execute Tasks" Start button from launching a second runner to pick
    up non-workflow tasks created during the workflow run (e.g. gap-analysis
    tasks generated by Phase 5b).

    .PARAMETER WorkflowFilter
    The workflow name to match against each task's `workflow` field.
    #>

    param(
        [Parameter(Mandatory)]
        [string]$WorkflowFilter
    )

    $index = Get-TaskIndex
    $pendingPools = @(
        @($index.Todo.Values),
        @($index.Analysed.Values),
        @($index.Analysing.Values),
        @($index.InProgress.Values),
        @($index.NeedsInput.Values)
    )
    foreach ($pool in $pendingPools) {
        foreach ($task in $pool) {
            if ($task.workflow -eq $WorkflowFilter) {
                return $false
            }
        }
    }
    return $true
}

Export-ModuleMember -Function @(
    'Initialize-ProcessRegistry',
    'New-ProcessId',
    'Write-ProcessFile',
    'Write-ProcessActivity',
    'Test-ProcessStopSignal',
    'Acquire-ProcessLock',
    'Test-ProcessLock',
    'Set-ProcessLock',
    'Remove-ProcessLock',
    'Test-Preflight',
    'Add-YamlFrontMatter',
    'Get-NextTodoTask',
    'Get-NextWorkflowTask',
    'Test-DependencyDeadlock',
    'Test-WorkflowComplete'
)