Public/Repair-ResolvedBackfill.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Repair-ResolvedBackfill {
    <#
    .SYNOPSIS
        Backfills key_points from resolved unmapped concepts into pov_summaries.
    .DESCRIPTION
        When unmapped concepts are resolved (matched to taxonomy nodes via
        Repair-UnmappedConcepts), they receive a resolved_node_id but are NOT
        automatically promoted into the pov_summaries key_points arrays.

        This cmdlet scans every summary's unmapped_concepts for entries that
        have a resolved_node_id, checks whether that node already appears in
        the corresponding POV's key_points, and if not, creates a new
        key_point entry with extraction_confidence 0.7 and
        excerpt_context "unmapped_concept_backfill".

        POV mapping:
          - suggested_pov "accelerationist" -> pov_summaries.accelerationist.key_points
          - suggested_pov "safetyist" -> pov_summaries.safetyist.key_points
          - suggested_pov "skeptic" -> pov_summaries.skeptic.key_points
          - suggested_pov "situations" -> SKIPPED (no key_points section)
          - suggested_pov "cross_cutting" -> SKIPPED (no pov_summaries section)

        Also scans factual_claims to identify candidates whose
        linked_taxonomy_nodes share a POV family prefix with resolved concepts.
        These are logged for manual review but not auto-modified.
    .PARAMETER DocId
        Wildcard pattern to limit which summaries to process.
        Default: '*' (all summaries).
    .PARAMETER WhatIf
        Show what would be changed without writing files.
    .EXAMPLE
        Repair-ResolvedBackfill
        # Process all summaries.
    .EXAMPLE
        Repair-ResolvedBackfill -DocId '*constitution*'
        # Process only matching summaries.
    .EXAMPLE
        Repair-ResolvedBackfill -WhatIf
        # Preview changes without modifying files.
    #>

    [CmdletBinding(SupportsShouldProcess)]
    param(
        [string]$DocId = '*'
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    $SummariesDir = Get-SummariesDir

    if (-not (Test-Path $SummariesDir)) {
        Write-Fail "Summaries directory not found: $SummariesDir"
        return
    }

    $SummaryFiles = @(Get-ChildItem -Path $SummariesDir -Filter '*.json' -File |
        Where-Object { $_.BaseName -like $DocId })

    if ($SummaryFiles.Count -eq 0) {
        Write-Warn "No summary files matched pattern '$DocId'"
        return
    }

    # Valid POV names that map to pov_summaries sections
    $ValidPovs = @('accelerationist', 'safetyist', 'skeptic')

    # POV family prefixes for factual_claims candidate matching
    $PovPrefixes = @{
        accelerationist = 'acc-'
        safetyist       = 'saf-'
        skeptic         = 'skp-'
    }

    Write-Step "Scanning $($SummaryFiles.Count) summary file(s) for resolved unmapped concepts"
    Write-Info "Action: For each resolved unmapped concept, check if its node already"
    Write-Info " appears in pov_summaries key_points. If not, create a new entry."
    Write-Info ""

    $TotalBackfilled    = 0
    $TotalAlreadyLinked = 0
    $TotalSkippedPov    = 0
    $TotalSkippedNoPov  = 0
    $FilesModified      = 0
    $AllBackfills       = [System.Collections.Generic.List[PSObject]]::new()
    $AllSkipped         = [System.Collections.Generic.List[PSObject]]::new()
    $AllClaimCandidates = [System.Collections.Generic.List[PSObject]]::new()

    foreach ($File in $SummaryFiles) {
        try {
            $Summary = Get-Content -Raw -Path $File.FullName | ConvertFrom-Json
        }
        catch {
            Write-Warn "Failed to parse $($File.Name): $_"
            continue
        }

        $DocName = $File.BaseName

        # ── Check for unmapped_concepts with resolved_node_id ────────────
        $HasUnmapped = $Summary.PSObject.Properties['unmapped_concepts']
        if (-not $HasUnmapped -or -not $HasUnmapped.Value) { continue }
        $Unmapped = @($HasUnmapped.Value)
        if ($Unmapped.Count -eq 0) { continue }

        # Filter to only resolved concepts
        $ResolvedConcepts = @($Unmapped | Where-Object {
            $_.PSObject.Properties['resolved_node_id'] -and
            -not [string]::IsNullOrWhiteSpace($_.resolved_node_id)
        })
        if ($ResolvedConcepts.Count -eq 0) { continue }

        # ── Ensure pov_summaries exists ──────────────────────────────────
        $HasPovSummaries = $Summary.PSObject.Properties['pov_summaries']
        if (-not $HasPovSummaries -or -not $HasPovSummaries.Value) {
            Write-Warn "$DocName — no pov_summaries section, skipping $($ResolvedConcepts.Count) resolved concept(s)"
            continue
        }
        $PovSummaries = $Summary.pov_summaries

        # ── Build a set of existing key_point taxonomy_node_ids per POV ──
        $ExistingNodeIds = @{}
        foreach ($Pov in $ValidPovs) {
            $ExistingNodeIds[$Pov] = [System.Collections.Generic.HashSet[string]]::new()
            if ($PovSummaries.PSObject.Properties[$Pov] -and $PovSummaries.$Pov) {
                $PovSection = $PovSummaries.$Pov
                if ($PovSection.PSObject.Properties['key_points'] -and $PovSection.key_points) {
                    foreach ($Kp in @($PovSection.key_points)) {
                        if ($Kp.PSObject.Properties['taxonomy_node_id'] -and $Kp.taxonomy_node_id) {
                            $null = $ExistingNodeIds[$Pov].Add($Kp.taxonomy_node_id)
                        }
                    }
                }
            }
        }

        # ── Collect factual_claims for candidate matching ────────────────
        $FactualClaims = @()
        if ($Summary.PSObject.Properties['factual_claims'] -and $Summary.factual_claims) {
            $FactualClaims = @($Summary.factual_claims)
        }

        $FileModified = $false
        $FileBackfillCount = 0

        foreach ($Concept in $ResolvedConcepts) {
            $NodeId      = $Concept.resolved_node_id
            $SuggestedPov = if ($Concept.PSObject.Properties['suggested_pov']) { $Concept.suggested_pov } else { $null }
            $ConceptText = if ($Concept.PSObject.Properties['concept']) { $Concept.concept } else { '' }
            $Category    = if ($Concept.PSObject.Properties['suggested_category']) { $Concept.suggested_category } else { 'Beliefs' }
            $Label       = if ($Concept.PSObject.Properties['suggested_label']) { $Concept.suggested_label } else { '' }

            # ── Skip non-POV concepts (situations, cross_cutting) ────────
            if (-not $SuggestedPov -or $SuggestedPov -eq 'situations') {
                $TotalSkippedPov++
                $null = $AllSkipped.Add([PSCustomObject]@{
                    DocId     = $DocName
                    NodeId    = $NodeId
                    Pov       = $SuggestedPov
                    Label     = $Label
                    Reason    = 'situations_pov'
                })
                Write-Info "$DocName — SKIP '$Label' ($NodeId): suggested_pov is 'situations' (no key_points section)"
                continue
            }

            if ($SuggestedPov -eq 'cross_cutting' -or $SuggestedPov -eq 'cross-cutting') {
                $TotalSkippedPov++
                $null = $AllSkipped.Add([PSCustomObject]@{
                    DocId     = $DocName
                    NodeId    = $NodeId
                    Pov       = $SuggestedPov
                    Label     = $Label
                    Reason    = 'cross_cutting_pov'
                })
                Write-Info "$DocName — SKIP '$Label' ($NodeId): suggested_pov is '$SuggestedPov' (no pov_summaries section)"
                continue
            }

            # ── Validate the POV section exists ──────────────────────────
            if ($SuggestedPov -notin $ValidPovs) {
                $TotalSkippedNoPov++
                $null = $AllSkipped.Add([PSCustomObject]@{
                    DocId     = $DocName
                    NodeId    = $NodeId
                    Pov       = $SuggestedPov
                    Label     = $Label
                    Reason    = 'unknown_pov'
                })
                Write-Warn "$DocName — SKIP '$Label' ($NodeId): unknown suggested_pov '$SuggestedPov'"
                continue
            }

            if (-not $PovSummaries.PSObject.Properties[$SuggestedPov] -or -not $PovSummaries.$SuggestedPov) {
                $TotalSkippedNoPov++
                $null = $AllSkipped.Add([PSCustomObject]@{
                    DocId     = $DocName
                    NodeId    = $NodeId
                    Pov       = $SuggestedPov
                    Label     = $Label
                    Reason    = 'pov_section_missing'
                })
                Write-Warn "$DocName — SKIP '$Label' ($NodeId): POV section '$SuggestedPov' not found in pov_summaries"
                continue
            }

            # ── Check if node already exists in key_points ───────────────
            if ($ExistingNodeIds[$SuggestedPov].Contains($NodeId)) {
                $TotalAlreadyLinked++
                Write-Info "$DocName — ALREADY LINKED '$Label' ($NodeId) in $SuggestedPov key_points"
                continue
            }

            # ── Create the new key_point entry ───────────────────────────
            $NewKeyPoint = [PSCustomObject]@{
                stance                = 'aligned'
                taxonomy_node_id      = $NodeId
                category              = $Category
                point                 = $ConceptText
                verbatim              = $null
                excerpt_context       = 'unmapped_concept_backfill'
                extraction_confidence = 0.7
                vocabulary_terms      = @()
            }

            if ($PSCmdlet.ShouldProcess("$($File.Name) [$SuggestedPov]", "Add key_point for $NodeId '$Label'")) {
                $PovSection = $PovSummaries.$SuggestedPov

                # Ensure key_points array exists
                if (-not $PovSection.PSObject.Properties['key_points'] -or $null -eq $PovSection.key_points) {
                    $PovSection | Add-Member -NotePropertyName 'key_points' -NotePropertyValue @() -Force
                }

                # Append the new key_point — wrap existing in @() for safety then build new array
                $CurrentPoints = @($PovSection.key_points)
                $PovSection.key_points = @($CurrentPoints) + @($NewKeyPoint)

                # Track the new node ID so we don't duplicate within this file
                $null = $ExistingNodeIds[$SuggestedPov].Add($NodeId)

                $FileModified = $true
                $FileBackfillCount++
                $TotalBackfilled++
            }

            $null = $AllBackfills.Add([PSCustomObject]@{
                DocId    = $DocName
                NodeId   = $NodeId
                Pov      = $SuggestedPov
                Category = $Category
                Label    = $Label
            })

            Write-OK "$DocName — BACKFILL '$Label' ($NodeId) -> $SuggestedPov.key_points"

            # ── Factual claims candidate matching ────────────────────────
            # Find claims that share a POV family prefix with this concept's node
            if ($PovPrefixes.ContainsKey($SuggestedPov)) {
                $Prefix = $PovPrefixes[$SuggestedPov]
                foreach ($Claim in $FactualClaims) {
                    if (-not $Claim.PSObject.Properties['linked_taxonomy_nodes'] -or -not $Claim.linked_taxonomy_nodes) {
                        continue
                    }
                    $LinkedNodes = @($Claim.linked_taxonomy_nodes)
                    $SharesFamily = $false
                    foreach ($LinkedNode in $LinkedNodes) {
                        if ($LinkedNode -is [string] -and $LinkedNode.StartsWith($Prefix)) {
                            $SharesFamily = $true
                            break
                        }
                    }
                    if ($SharesFamily) {
                        $ClaimLabel = if ($Claim.PSObject.Properties['claim_label']) { $Claim.claim_label } else { '' }
                        $ClaimText  = if ($Claim.PSObject.Properties['claim']) { $Claim.claim } else { '' }

                        # Avoid duplicate candidate entries
                        $AlreadyLogged = $false
                        foreach ($Existing in $AllClaimCandidates) {
                            if ($Existing.DocId -eq $DocName -and
                                $Existing.ClaimLabel -eq $ClaimLabel -and
                                $Existing.ConceptNodeId -eq $NodeId) {
                                $AlreadyLogged = $true
                                break
                            }
                        }
                        if (-not $AlreadyLogged) {
                            $null = $AllClaimCandidates.Add([PSCustomObject]@{
                                DocId           = $DocName
                                ClaimLabel      = $ClaimLabel
                                ClaimText       = $ClaimText
                                LinkedNodes     = ($LinkedNodes -join ', ')
                                ConceptNodeId   = $NodeId
                                ConceptLabel    = $Label
                                PovFamily       = $SuggestedPov
                            })
                        }
                    }
                }
            }
        }

        # ── Write modified file ──────────────────────────────────────────
        if ($FileModified) {
            $Json = $Summary | ConvertTo-Json -Depth 20
            Write-Utf8NoBom -Path $File.FullName -Value $Json
            $FilesModified++
            Write-Info "$DocName — wrote $FileBackfillCount new key_point(s)"
        }
    }

    # ── Summary output ───────────────────────────────────────────────────
    Write-Step "Backfill complete"
    Write-OK   "$TotalBackfilled key_point(s) backfilled across $FilesModified file(s)"

    if ($TotalAlreadyLinked -gt 0) {
        Write-Info "$TotalAlreadyLinked resolved concept(s) already had key_points — no action needed"
    }
    if ($TotalSkippedPov -gt 0) {
        Write-Info "$TotalSkippedPov concept(s) skipped — suggested_pov was 'situations' or 'cross_cutting' (no key_points section)"
    }
    if ($TotalSkippedNoPov -gt 0) {
        Write-Warn "$TotalSkippedNoPov concept(s) skipped — POV section missing or unrecognized"
    }

    # ── Factual claims candidate report ──────────────────────────────────
    if ($AllClaimCandidates.Count -gt 0) {
        Write-Step "Factual claims candidates for manual review"
        Write-Info "The following factual_claims share a POV-family prefix with backfilled concepts."
        Write-Info "They may benefit from linked_taxonomy_nodes updates. Review manually."
        Write-Info ""
        foreach ($Candidate in $AllClaimCandidates) {
            Write-Info "$($Candidate.DocId):"
            Write-Info " Claim: $($Candidate.ClaimLabel)"
            Write-Info " Current linked nodes: $($Candidate.LinkedNodes)"
            Write-Info " Related concept node: $($Candidate.ConceptNodeId) ($($Candidate.ConceptLabel))"
            Write-Info ""
        }
        Write-Info "$($AllClaimCandidates.Count) candidate claim(s) identified for review"
    }

    # ── Return results ───────────────────────────────────────────────────
    return [PSCustomObject]@{
        Backfilled      = $AllBackfills
        Skipped         = $AllSkipped
        ClaimCandidates = $AllClaimCandidates
        Statistics      = [PSCustomObject]@{
            TotalBackfilled    = $TotalBackfilled
            TotalAlreadyLinked = $TotalAlreadyLinked
            TotalSkippedPov    = $TotalSkippedPov
            TotalSkippedNoPov  = $TotalSkippedNoPov
            FilesModified      = $FilesModified
        }
    }
}