Public/Test-TaxonomyIntegrity.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Test-TaxonomyIntegrity {
    <#
    .SYNOPSIS
        Validate taxonomy data integrity across all files.
    .DESCRIPTION
        Checks:
        - All policy_id references resolve to registry entries
        - All registry entries are referenced by at least one node
        - member_count and source_povs are accurate
        - No duplicate policy_id references within a single node
        - Edge source/target IDs resolve to existing nodes or policies
        - Embeddings exist for all nodes and policies
    .PARAMETER Detailed
        Show per-issue details instead of just counts.
    .PARAMETER PassThru
        Return a summary object.
    .PARAMETER Repair
        Auto-fix all repairable issues (dangling children, parent refs, situation refs, bad edges).
    .EXAMPLE
        Test-TaxonomyIntegrity
    .EXAMPLE
        Test-TaxonomyIntegrity -Detailed
    .EXAMPLE
        Test-TaxonomyIntegrity -Repair
    #>

    [CmdletBinding()]
    param(
        [switch]$Detailed,
        [switch]$PassThru,
        [switch]$Repair
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    $TaxDir = Get-TaxonomyDir
    $Issues = [System.Collections.Generic.List[PSCustomObject]]::new()
    $Checks = 0
    $Passed = 0

    # ── Load all data ──
    $PovFiles = @('accelerationist', 'safetyist', 'skeptic', 'situations')
    $AllNodeIds = [System.Collections.Generic.HashSet[string]]::new()
    $PovNodeIds = [System.Collections.Generic.HashSet[string]]::new()
    $PolicyRefs = @{}          # policy_id -> list of node_ids
    $DuplicateRefs = @()       # nodes with duplicate policy_id refs
    $MissingPolicyId = @()     # policy_actions without policy_id
    $ActualPovs = @{}          # policy_id -> set of povs
    $ActualCounts = @{}        # policy_id -> count
    $LoadedFiles = @{}         # povKey -> { Path, Data }
    $Dirty = @{}               # povKey -> $true if modified

    foreach ($PovKey in $PovFiles) {
        $FilePath = Join-Path $TaxDir "$PovKey.json"
        if (-not (Test-Path $FilePath)) { continue }
        $FileData = Get-Content -Raw -Path $FilePath | ConvertFrom-Json
        $LoadedFiles[$PovKey] = @{ Path = $FilePath; Data = $FileData }

        foreach ($Node in $FileData.nodes) {
            [void]$AllNodeIds.Add($Node.id)
            if ($PovKey -ne 'situations') { [void]$PovNodeIds.Add($Node.id) }

            if (-not $Node.PSObject.Properties['graph_attributes'] -or $null -eq $Node.graph_attributes) { continue }
            if (-not $Node.graph_attributes.PSObject.Properties['policy_actions']) { continue }

            $SeenIds = [System.Collections.Generic.HashSet[string]]::new()
            foreach ($PA in $Node.graph_attributes.policy_actions) {
                if ($PA.PSObject.Properties['policy_id']) { $Pid = $PA.policy_id } else { $Pid = $null }
                if (-not $Pid) {
                    $MissingPolicyId += [PSCustomObject]@{ NodeId = $Node.id; POV = $PovKey; Action = $PA.action }
                    continue
                }

                if (-not $SeenIds.Add($Pid)) {
                    $DuplicateRefs += [PSCustomObject]@{ NodeId = $Node.id; PolicyId = $Pid }
                }

                if (-not $PolicyRefs.ContainsKey($Pid)) {
                    $PolicyRefs[$Pid] = [System.Collections.Generic.List[string]]::new()
                    $ActualPovs[$Pid] = [System.Collections.Generic.HashSet[string]]::new()
                    $ActualCounts[$Pid] = 0
                }
                $PolicyRefs[$Pid].Add($Node.id)
                [void]$ActualPovs[$Pid].Add($PovKey)
                $ActualCounts[$Pid]++
            }
        }
    }

    # ── Check 1: Policy registry ──
    $Checks++
    $RegistryPath = Join-Path $TaxDir 'policy_actions.json'
    if (Test-Path $RegistryPath) {
        $Registry = Get-Content -Raw -Path $RegistryPath | ConvertFrom-Json
        $RegistryIds = [System.Collections.Generic.HashSet[string]]::new()
        foreach ($Pol in $Registry.policies) { [void]$RegistryIds.Add($Pol.id) }

        # Unresolved refs
        $Unresolved = @($PolicyRefs.Keys | Where-Object { -not $RegistryIds.Contains($_) })
        if ($Unresolved.Count -gt 0) {
            $Issues.Add([PSCustomObject]@{ Check = 'PolicyRef'; Severity = 'Error'; Count = $Unresolved.Count; Detail = "policy_id refs not in registry: $($Unresolved -join ', ')" })
        } else { $Passed++ }

        # Orphaned
        $Checks++
        $Orphaned = @($RegistryIds | Where-Object { -not $PolicyRefs.ContainsKey($_) })
        if ($Orphaned.Count -gt 0) {
            $Issues.Add([PSCustomObject]@{ Check = 'Orphaned'; Severity = 'Warning'; Count = $Orphaned.Count; Detail = "registry entries with no node refs: $($Orphaned[0..([Math]::Min(4, $Orphaned.Count-1))] -join ', ')$(if ($Orphaned.Count -gt 5) { ' ...' })" })
        } else { $Passed++ }

        # member_count accuracy
        $Checks++
        $CountMismatches = 0
        foreach ($Pol in $Registry.policies) {
            if ($ActualCounts.ContainsKey($Pol.id)) { $Actual = $ActualCounts[$Pol.id] } else { $Actual = 0 }
            if ($Pol.member_count -ne $Actual) { $CountMismatches++ }
        }
        if ($CountMismatches -gt 0) {
            $Issues.Add([PSCustomObject]@{ Check = 'MemberCount'; Severity = 'Warning'; Count = $CountMismatches; Detail = "$CountMismatches policies have inaccurate member_count" })
        } else { $Passed++ }
    }
    else {
        $Issues.Add([PSCustomObject]@{ Check = 'Registry'; Severity = 'Error'; Count = 1; Detail = 'policy_actions.json not found' })
    }

    # ── Check 2: Missing policy_id ──
    $Checks++
    if ($MissingPolicyId.Count -gt 0) {
        $Issues.Add([PSCustomObject]@{ Check = 'MissingPolicyId'; Severity = 'Warning'; Count = $MissingPolicyId.Count; Detail = "$($MissingPolicyId.Count) policy_actions without policy_id" })
    } else { $Passed++ }

    # ── Check 3: Duplicate refs ──
    $Checks++
    if ($DuplicateRefs.Count -gt 0) {
        $Issues.Add([PSCustomObject]@{ Check = 'DuplicateRef'; Severity = 'Warning'; Count = $DuplicateRefs.Count; Detail = "$($DuplicateRefs.Count) duplicate policy_id refs within nodes" })
    } else { $Passed++ }

    # ── Check 4: Edge integrity ──
    $Checks++
    $EdgesPath = Join-Path $TaxDir 'edges.json'
    $BadEdges = 0
    if (Test-Path $EdgesPath) {
        $EdgesData = Get-Content -Raw -Path $EdgesPath | ConvertFrom-Json
        $ValidIds = [System.Collections.Generic.HashSet[string]]::new($AllNodeIds)
        if ($Registry) { foreach ($Pol in $Registry.policies) { [void]$ValidIds.Add($Pol.id) } }

        foreach ($Edge in $EdgesData.edges) {
            if (-not $ValidIds.Contains($Edge.source) -or -not $ValidIds.Contains($Edge.target)) {
                $BadEdges++
            }
        }
    }
    if ($BadEdges -gt 0) {
        $Issues.Add([PSCustomObject]@{ Check = 'EdgeRef'; Severity = 'Error'; Count = $BadEdges; Detail = "$BadEdges edges reference non-existent nodes/policies" })
    } else { $Passed++ }

    # ── Check 5: Embedding coverage ──
    $Checks++
    $EmbPath = Join-Path $TaxDir 'embeddings.json'
    $MissingEmb = 0
    if (Test-Path $EmbPath) {
        $EmbData = Get-Content -Raw -Path $EmbPath | ConvertFrom-Json
        $EmbIds = [System.Collections.Generic.HashSet[string]]::new()
        foreach ($Prop in $EmbData.nodes.PSObject.Properties) { [void]$EmbIds.Add($Prop.Name) }

        foreach ($Nid in $AllNodeIds) {
            if (-not $EmbIds.Contains($Nid)) { $MissingEmb++ }
        }
        if ($Registry) {
            foreach ($Pol in $Registry.policies) {
                if (-not $EmbIds.Contains($Pol.id)) { $MissingEmb++ }
            }
        }
    }
    else {
        $MissingEmb = $AllNodeIds.Count
    }
    if ($MissingEmb -gt 0) {
        $Issues.Add([PSCustomObject]@{ Check = 'Embeddings'; Severity = 'Warning'; Count = $MissingEmb; Detail = "$MissingEmb nodes/policies missing embeddings" })
    } else { $Passed++ }

    # ── Check 6: Dangling children ──
    $Checks++
    $DanglingChildren = @()
    foreach ($PovKey in @('accelerationist', 'safetyist', 'skeptic')) {
        if (-not $LoadedFiles.ContainsKey($PovKey)) { continue }
        foreach ($Node in $LoadedFiles[$PovKey].Data.nodes) {
            if (-not $Node.PSObject.Properties['children'] -or $null -eq $Node.children) { continue }
            foreach ($ChildId in @($Node.children)) {
                if (-not $PovNodeIds.Contains($ChildId)) {
                    $DanglingChildren += [PSCustomObject]@{ NodeId = $Node.id; ChildId = $ChildId; POV = $PovKey }
                }
            }
        }
    }
    if ($DanglingChildren.Count -gt 0) {
        $Detail = ($DanglingChildren | ForEach-Object { "$($_.NodeId) -> $($_.ChildId)" }) -join '; '
        $Issues.Add([PSCustomObject]@{ Check = 'DanglingChild'; Severity = 'Error'; Count = $DanglingChildren.Count; Detail = "children ref non-existent nodes: $Detail" })
    } else { $Passed++ }

    # ── Check 7: Dangling parent_id ──
    $Checks++
    $DanglingParents = @()
    foreach ($PovKey in @('accelerationist', 'safetyist', 'skeptic')) {
        if (-not $LoadedFiles.ContainsKey($PovKey)) { continue }
        foreach ($Node in $LoadedFiles[$PovKey].Data.nodes) {
            if ($Node.parent_id -and -not $PovNodeIds.Contains($Node.parent_id)) {
                $DanglingParents += [PSCustomObject]@{ NodeId = $Node.id; ParentId = $Node.parent_id; POV = $PovKey }
            }
        }
    }
    if ($DanglingParents.Count -gt 0) {
        $Detail = ($DanglingParents | ForEach-Object { "$($_.NodeId) -> $($_.ParentId)" }) -join '; '
        $Issues.Add([PSCustomObject]@{ Check = 'DanglingParent'; Severity = 'Error'; Count = $DanglingParents.Count; Detail = "parent_id refs non-existent nodes: $Detail" })
    } else { $Passed++ }

    # ── Check 8: Dangling situation_refs ──
    $Checks++
    $SitIds = [System.Collections.Generic.HashSet[string]]::new()
    if ($LoadedFiles.ContainsKey('situations')) {
        foreach ($N in $LoadedFiles['situations'].Data.nodes) { [void]$SitIds.Add($N.id) }
    }
    $DanglingSitRefs = @()
    foreach ($PovKey in @('accelerationist', 'safetyist', 'skeptic')) {
        if (-not $LoadedFiles.ContainsKey($PovKey)) { continue }
        foreach ($Node in $LoadedFiles[$PovKey].Data.nodes) {
            if (-not $Node.PSObject.Properties['situation_refs'] -or $null -eq $Node.situation_refs) { continue }
            foreach ($Ref in @($Node.situation_refs)) {
                if (-not $SitIds.Contains($Ref)) {
                    $DanglingSitRefs += [PSCustomObject]@{ NodeId = $Node.id; SitRef = $Ref; POV = $PovKey }
                }
            }
        }
    }
    if ($DanglingSitRefs.Count -gt 0) {
        $Detail = ($DanglingSitRefs | ForEach-Object { "$($_.NodeId) -> $($_.SitRef)" }) -join '; '
        $Issues.Add([PSCustomObject]@{ Check = 'DanglingSitRef'; Severity = 'Error'; Count = $DanglingSitRefs.Count; Detail = "situation_refs non-existent nodes: $Detail" })
    } else { $Passed++ }

    # ── Check 9: Dangling linked_nodes in situations ──
    $Checks++
    $DanglingLinked = @()
    if ($LoadedFiles.ContainsKey('situations')) {
        foreach ($Node in $LoadedFiles['situations'].Data.nodes) {
            if (-not $Node.PSObject.Properties['linked_nodes'] -or $null -eq $Node.linked_nodes) { continue }
            foreach ($Linked in @($Node.linked_nodes)) {
                if (-not $AllNodeIds.Contains($Linked)) {
                    $DanglingLinked += [PSCustomObject]@{ NodeId = $Node.id; LinkedId = $Linked }
                }
            }
        }
    }
    if ($DanglingLinked.Count -gt 0) {
        $Detail = ($DanglingLinked | ForEach-Object { "$($_.NodeId) -> $($_.LinkedId)" }) -join '; '
        $Issues.Add([PSCustomObject]@{ Check = 'DanglingLinked'; Severity = 'Warning'; Count = $DanglingLinked.Count; Detail = "linked_nodes ref non-existent nodes: $Detail" })
    } else { $Passed++ }

    # ── Repair ──
    if ($Repair -and $Issues.Count -gt 0) {
        $Repaired = 0
        Write-Host ''
        Write-Host ' Repairing...' -ForegroundColor Cyan

        # Fix dangling children
        foreach ($DC in $DanglingChildren) {
            $Node = $LoadedFiles[$DC.POV].Data.nodes | Where-Object { $_.id -eq $DC.NodeId }
            $Node.children = @($Node.children | Where-Object { $_ -ne $DC.ChildId })
            $Dirty[$DC.POV] = $true
            $Repaired++
            Write-Host " Removed child '$($DC.ChildId)' from $($DC.NodeId)" -ForegroundColor Yellow
        }

        # Fix dangling parent_id
        foreach ($DP in $DanglingParents) {
            $Node = $LoadedFiles[$DP.POV].Data.nodes | Where-Object { $_.id -eq $DP.NodeId }
            $Node.parent_id = $null
            $Dirty[$DP.POV] = $true
            $Repaired++
            Write-Host " Cleared parent_id '$($DP.ParentId)' from $($DP.NodeId)" -ForegroundColor Yellow
        }

        # Fix dangling situation_refs
        foreach ($DS in $DanglingSitRefs) {
            $Node = $LoadedFiles[$DS.POV].Data.nodes | Where-Object { $_.id -eq $DS.NodeId }
            $Node.situation_refs = @($Node.situation_refs | Where-Object { $_ -ne $DS.SitRef })
            $Dirty[$DS.POV] = $true
            $Repaired++
            Write-Host " Removed situation_ref '$($DS.SitRef)' from $($DS.NodeId)" -ForegroundColor Yellow
        }

        # Fix dangling linked_nodes in situations
        foreach ($DL in $DanglingLinked) {
            $Node = $LoadedFiles['situations'].Data.nodes | Where-Object { $_.id -eq $DL.NodeId }
            $Node.linked_nodes = @($Node.linked_nodes | Where-Object { $_ -ne $DL.LinkedId })
            $Dirty['situations'] = $true
            $Repaired++
            Write-Host " Removed linked_node '$($DL.LinkedId)' from $($DL.NodeId)" -ForegroundColor Yellow
        }

        # Fix dangling edges
        $EdgesPath = Join-Path $TaxDir 'edges.json'
        if ($BadEdges -gt 0 -and (Test-Path $EdgesPath)) {
            $EdgesData = Get-Content -Raw -Path $EdgesPath | ConvertFrom-Json
            $ValidIds = [System.Collections.Generic.HashSet[string]]::new($AllNodeIds)
            if ($Registry) { foreach ($Pol in $Registry.policies) { [void]$ValidIds.Add($Pol.id) } }
            $OrigCount = $EdgesData.edges.Count
            $EdgesData.edges = @($EdgesData.edges | Where-Object { $ValidIds.Contains($_.source) -and $ValidIds.Contains($_.target) })
            $Removed = $OrigCount - $EdgesData.edges.Count
            if ($Removed -gt 0) {
                ($EdgesData | ConvertTo-Json -Depth 20) -replace "`r`n", "`n" | Set-Content -Path $EdgesPath -Encoding UTF8 -NoNewline
                $Repaired += $Removed
                Write-Host " Removed $Removed dangling edges" -ForegroundColor Yellow
            }
        }

        # Save modified files
        foreach ($PovKey in $Dirty.Keys) {
            $Entry = $LoadedFiles[$PovKey]
            ($Entry.Data | ConvertTo-Json -Depth 20) -replace "`r`n", "`n" | Set-Content -Path $Entry.Path -Encoding UTF8 -NoNewline
            Write-Host " Saved $($Entry.Path)" -ForegroundColor Green
        }

        Write-Host " Repaired $Repaired issue(s)." -ForegroundColor Green
    }

    # ── Report ──
    Write-Host ''
    Write-Host '=== Taxonomy Integrity Check ===' -ForegroundColor Cyan
    Write-Host " Nodes: $($AllNodeIds.Count)" -ForegroundColor White
    Write-Host " Policies: $(if ($Registry) { $Registry.policies.Count } else { '?' })" -ForegroundColor White
    Write-Host " Checks: $Checks" -ForegroundColor White
    Write-Host " Passed: $Passed" -ForegroundColor Green
    Write-Host " Issues: $($Issues.Count)" -ForegroundColor $(if ($Issues.Count -gt 0) { 'Yellow' } else { 'Green' })

    if ($Issues.Count -gt 0) {
        Write-Host ''
        foreach ($Issue in $Issues) {
            if ($Issue.Severity -eq 'Error') { $Color = 'Red' } else { $Color = 'Yellow' }
            Write-Host " [$($Issue.Severity)] $($Issue.Check): $($Issue.Detail)" -ForegroundColor $Color
        }
    }
    else {
        Write-Host ''
        Write-Host ' All checks passed!' -ForegroundColor Green
    }
    Write-Host ''

    if ($PassThru) {
        [PSCustomObject]@{
            Nodes     = $AllNodeIds.Count
            Policies  = if ($Registry) { $Registry.policies.Count } else { 0 }
            Checks    = $Checks
            Passed    = $Passed
            Issues    = $Issues.Count
            Details   = @($Issues)
        }
    }
}