Public/Repair-AITSummaryMappings.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. <# .SYNOPSIS Backfills null and stale taxonomy_node_id values in summary key points using embedding cosine similarity against the current taxonomy. .DESCRIPTION Scans all summary files for key points where taxonomy_node_id is null (never mapped) or points to a node that no longer exists (stale). Uses the all-MiniLM-L6-v2 embedding model to find the best-matching taxonomy node for each key point's text. Optionally re-evaluates existing mappings to redistribute references away from gravity-well nodes toward more specific matches. Does NOT re-run the full LLM pipeline — only fixes the node pointer. .EXAMPLE Repair-AITSummaryMappings -DryRun # Preview what would change without modifying files .EXAMPLE Repair-AITSummaryMappings # Fix null and stale mappings (default threshold 0.35) .EXAMPLE Repair-AITSummaryMappings -ReEvaluate -Margin 0.10 # Also reassign existing mappings where a better match exists (10%+ improvement) .EXAMPLE Repair-AITSummaryMappings -UpdateEmbeddings # Regenerate embeddings first, then backfill #> function Repair-AITSummaryMappings { [CmdletBinding(SupportsShouldProcess)] param( [Parameter()] [switch]$DryRun, [Parameter()] [ValidateRange(0.2, 0.8)] [double]$Threshold = 0.35, [Parameter()] [switch]$ReEvaluate, [Parameter()] [ValidateRange(0.01, 0.5)] [double]$Margin = 0.08, [Parameter()] [switch]$UpdateEmbeddings ) Set-StrictMode -Version Latest $RepoRoot = Get-CodeRoot $BackfillScript = Join-Path $RepoRoot 'scripts' 'backfill_taxonomy_mappings.py' $EmbedScript = Join-Path $RepoRoot 'scripts' 'embed_taxonomy.py' if (-not (Test-Path $BackfillScript)) { throw "Backfill script not found at: $BackfillScript" } # Check Python $Python = Get-Command python -ErrorAction SilentlyContinue if (-not $Python) { $Python = Get-Command python3 -ErrorAction SilentlyContinue } if (-not $Python) { throw "Python is required. Install Python 3.10+ with sentence-transformers: pip install sentence-transformers" } # Optionally regenerate embeddings if ($UpdateEmbeddings) { Write-Host "Regenerating embeddings..." -ForegroundColor Yellow $EmbResult = & $Python.Source $EmbedScript generate 2>&1 foreach ($Line in $EmbResult) { Write-Host " $Line" -ForegroundColor DarkGray } Write-Host "" } # Build arguments $PyArgs = @($BackfillScript) $PyArgs += "--threshold" $PyArgs += $Threshold.ToString() if ($DryRun) { $PyArgs += "--dry-run" } if ($ReEvaluate) { $PyArgs += "--re-evaluate" $PyArgs += "--re-evaluate-margin" $PyArgs += $Margin.ToString() } $Action = if ($DryRun) { "Preview backfill" } else { "Backfill summary mappings" } if (-not $PSCmdlet.ShouldProcess("all summaries", $Action)) { return } # Run Write-Host "Running backfill..." -ForegroundColor Yellow $StdOut = [System.Collections.Generic.List[string]]::new() $Psi = [System.Diagnostics.ProcessStartInfo]::new() $Psi.FileName = $Python.Source $Psi.Arguments = ($PyArgs | ForEach-Object { "`"$_`"" }) -join ' ' $Psi.WorkingDirectory = $RepoRoot $Psi.RedirectStandardOutput = $true $Psi.RedirectStandardError = $true $Psi.UseShellExecute = $false $Psi.CreateNoWindow = $true try { $Proc = [System.Diagnostics.Process]::Start($Psi) } catch { throw "Failed to start backfill process: $_" } while (-not $Proc.StandardError.EndOfStream) { $Line = $Proc.StandardError.ReadLine() if ($Line) { Write-Host $Line -ForegroundColor DarkGray } } $StdOutText = $Proc.StandardOutput.ReadToEnd() if (-not $Proc.WaitForExit(600000)) { try { $Proc.Kill() } catch { } throw "Backfill timed out after 10 minutes." } if ($StdOutText) { $StdOut.Add($StdOutText) } if ($DryRun -and $StdOutText) { try { $Report = $StdOutText | ConvertFrom-Json Write-Host "`nDry run report:" -ForegroundColor Cyan Write-Host " Null → mapped: $($Report.null_fixed)" -ForegroundColor Green Write-Host " Stale → remapped: $($Report.stale_fixed)" -ForegroundColor Green Write-Host " Re-evaluated: $($Report.reassigned)" -ForegroundColor Yellow Write-Host " No match found: $($Report.no_match)" -ForegroundColor DarkGray return $Report } catch { return $StdOutText } } Write-Host "`nBackfill complete." -ForegroundColor Green } |