VBAF.ML.FeatureEngineering.ps1
|
#Requires -Version 5.1 <# .SYNOPSIS Feature Engineering - Create Better Features for ML .DESCRIPTION Implements feature engineering from scratch. Designed as a TEACHING resource - every step explained. Features included: - Polynomial features : x, x^2, x^3, x1*x2 combinations - Interaction terms : explicit pairwise feature products - Feature binning : continuous -> discrete buckets - Feature selection : variance, correlation, mutual info - PCA : dimensionality reduction - Pipeline : chain transformers in sequence Standalone - no external VBAF dependencies required. .NOTES Part of VBAF - Phase 5 Feature Engineering Module PS 5.1 compatible Teaching project - why and how of each transformation! #> $basePath = $PSScriptRoot # ============================================================ # TEACHING NOTE: What is Feature Engineering? # "Better features = better models" - this is often MORE # important than choosing the right algorithm! # # Raw features are what you measure. Engineered features # capture RELATIONSHIPS and PATTERNS that algorithms can't # discover on their own. # # Example: predicting house price # Raw : [size, floors] # Engineered: [size, floors, size^2, size*floors, size_per_floor] # The model can now find non-linear relationships! # ============================================================ # ============================================================ # POLYNOMIAL FEATURES # ============================================================ # TEACHING NOTE: Linear models can only find straight-line # relationships. Polynomial features let linear models fit CURVES! # # For features [x1, x2] with degree=2: # Output: [1, x1, x2, x1^2, x1*x2, x2^2] # The model learns: y = a + b*x1 + c*x2 + d*x1^2 + e*x1*x2 + f*x2^2 # # WARNING: degree=3 with 10 features -> 286 columns! # More features = more risk of overfitting! # ============================================================ class PolynomialFeatures { [int] $Degree [bool] $IncludeBias # include column of 1s [bool] $InteractionOnly # only x1*x2, skip x1^2 [int] $NInputFeatures [int] $NOutputFeatures [bool] $IsFitted = $false [string[]] $FeatureNames PolynomialFeatures() { $this.Degree = 2 $this.IncludeBias = $false $this.InteractionOnly = $false } PolynomialFeatures([int]$degree) { $this.Degree = $degree $this.IncludeBias = $false $this.InteractionOnly = $false } PolynomialFeatures([int]$degree, [bool]$interactionOnly) { $this.Degree = $degree $this.IncludeBias = $false $this.InteractionOnly = $interactionOnly } # Generate all combinations of feature indices up to given degree hidden [System.Collections.ArrayList] GetCombinations([int]$nFeatures) { $combos = [System.Collections.ArrayList]::new() if ($this.IncludeBias) { $combos.Add(@()) | Out-Null } # Degree 1: original features for ($i = 0; $i -lt $nFeatures; $i++) { $combos.Add(@($i)) | Out-Null } # Degree 2+ for ($d = 2; $d -le $this.Degree; $d++) { for ($i = 0; $i -lt $nFeatures; $i++) { for ($j = $i; $j -lt $nFeatures; $j++) { if ($this.InteractionOnly -and $i -eq $j) { continue } $combo = @($i, $j) $combos.Add($combo) | Out-Null } } } return $combos } [void] Fit([double[][]]$X, [string[]]$featureNames) { $this.NInputFeatures = $X[0].Length $combos = $this.GetCombinations($this.NInputFeatures) $this.NOutputFeatures = $combos.Count # Build feature names $names = [System.Collections.ArrayList]::new() foreach ($combo in $combos) { if ($combo.Length -eq 0) { $names.Add("1") | Out-Null; continue } $parts = @() $prev = -1; $exp = 1 for ($k = 0; $k -lt $combo.Length; $k++) { $fi = $combo[$k] $fn = if ($fi -lt $featureNames.Length) { $featureNames[$fi] } else { "f$fi" } if ($fi -eq $prev) { $exp++ } else { if ($prev -ge 0) { $pfn = if ($prev -lt $featureNames.Length) { $featureNames[$prev] } else { "f$prev" } $parts += if ($exp -gt 1) { "${pfn}^$exp" } else { $pfn } } $prev = $fi; $exp = 1 } } $fi = $combo[-1] $pfn = if ($fi -lt $featureNames.Length) { $featureNames[$fi] } else { "f$fi" } $parts += if ($exp -gt 1) { "${pfn}^$exp" } else { $pfn } $names.Add($parts -join "*") | Out-Null } $this.FeatureNames = $names.ToArray() $this.IsFitted = $true } [void] Fit([double[][]]$X) { $names = @(); for ($i = 0; $i -lt $X[0].Length; $i++) { $names += "x$i" } $this.Fit($X, $names) } [double[][]] Transform([double[][]]$X) { $combos = $this.GetCombinations($X[0].Length) $result = @() foreach ($row in $X) { $newRow = @(0.0) * $combos.Count for ($c = 0; $c -lt $combos.Count; $c++) { $combo = $combos[$c] if ($combo.Length -eq 0) { $newRow[$c] = 1.0; continue } $val = 1.0 foreach ($fi in $combo) { $val *= $row[$fi] } $newRow[$c] = $val } $result += ,$newRow } return $result } [double[][]] FitTransform([double[][]]$X) { $this.Fit($X) return $this.Transform($X) } [double[][]] FitTransform([double[][]]$X, [string[]]$featureNames) { $this.Fit($X, $featureNames) return $this.Transform($X) } [void] PrintSummary() { Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ Polynomial Features ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Degree : {0,-18}║" -f $this.Degree) -ForegroundColor Yellow Write-Host ("║ Interaction only: {0,-18}║" -f $this.InteractionOnly) -ForegroundColor Yellow Write-Host ("║ Input features : {0,-18}║" -f $this.NInputFeatures) -ForegroundColor White Write-Host ("║ Output features : {0,-18}║" -f $this.NOutputFeatures) -ForegroundColor Green Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan foreach ($name in $this.FeatureNames) { Write-Host ("║ {0,-36}║" -f $name) -ForegroundColor White } Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" } } # ============================================================ # INTERACTION TERMS (explicit, readable) # ============================================================ # TEACHING NOTE: Interactions capture COMBINED effects. # e.g. size*age: big old house behaves differently than # big new house or small old house. # More interpretable than full polynomial expansion! # ============================================================ class InteractionFeatures { [string[]] $FeatureNames [bool] $IsFitted = $false InteractionFeatures() {} # Add all pairwise products to feature matrix [double[][]] FitTransform([double[][]]$X, [string[]]$featureNames) { $this.FeatureNames = $featureNames $n = $X.Length $nF = $X[0].Length $result = @() foreach ($row in $X) { $extras = @() for ($i = 0; $i -lt $nF; $i++) { for ($j = $i + 1; $j -lt $nF; $j++) { $extras += $row[$i] * $row[$j] } } $newRow = @(0.0) * ($nF + $extras.Length) for ($k = 0; $k -lt $nF; $k++) { $newRow[$k] = $row[$k] } for ($k = 0; $k -lt $extras.Length; $k++) { $newRow[$nF + $k] = $extras[$k] } $result += ,$newRow } # Build output feature names $allNames = [System.Collections.ArrayList]::new() foreach ($n2 in $featureNames) { $allNames.Add($n2) | Out-Null } for ($i = 0; $i -lt $nF; $i++) { for ($j = $i + 1; $j -lt $nF; $j++) { $allNames.Add("$($featureNames[$i])*$($featureNames[$j])") | Out-Null } } $this.FeatureNames = $allNames.ToArray() $this.IsFitted = $true return $result } [void] PrintSummary() { Write-Host "" Write-Host "🔗 Interaction Features:" -ForegroundColor Green foreach ($name in $this.FeatureNames) { $color = if ($name -match '\*') { "Yellow" } else { "White" } Write-Host (" {0}" -f $name) -ForegroundColor $color } Write-Host "" } } # ============================================================ # FEATURE BINNING # ============================================================ # TEACHING NOTE: Binning converts continuous numbers to categories. # Why bin? # - Makes non-linear patterns easier to learn # - Reduces sensitivity to small measurement errors # - "Age 25-35" might matter more than exact age # # Two strategies: # Uniform : equal-width bins (e.g. 0-10, 10-20, 20-30) # Quantile : equal-frequency bins (same number of points each) # More robust when data is skewed! # ============================================================ class FeatureBinner { [string] $Strategy # "uniform" or "quantile" [int] $NBins [double[][]] $BinEdges # one array of edges per feature [bool] $IsFitted = $false FeatureBinner([int]$nBins) { $this.NBins = $nBins $this.Strategy = "quantile" } FeatureBinner([int]$nBins, [string]$strategy) { $this.NBins = $nBins $this.Strategy = $strategy } hidden [double] Percentile([double[]]$sorted, [double]$p) { $idx = $p / 100.0 * ($sorted.Length - 1) $lo = [int][Math]::Floor($idx) $hi = [int][Math]::Ceiling($idx) if ($lo -eq $hi) { return $sorted[$lo] } return $sorted[$lo] + ($idx - $lo) * ($sorted[$hi] - $sorted[$lo]) } [void] Fit([double[][]]$X) { $nFeatures = $X[0].Length $this.BinEdges = @() for ($f = 0; $f -lt $nFeatures; $f++) { $vals = ($X | ForEach-Object { $_[$f] }) | Sort-Object if ($this.Strategy -eq "uniform") { $minV = $vals[0] $maxV = $vals[-1] $step = ($maxV - $minV) / $this.NBins $edges = @($minV) for ($b = 1; $b -le $this.NBins; $b++) { $edges += $minV + $b * $step } } else { # Quantile edges $edges = @() for ($b = 0; $b -le $this.NBins; $b++) { $p = $b * 100.0 / $this.NBins $edges += $this.Percentile($vals, $p) } } $this.BinEdges += ,$edges } $this.IsFitted = $true } [double[][]] Transform([double[][]]$X) { $result = @() foreach ($row in $X) { $binned = @(0.0) * $row.Length for ($f = 0; $f -lt $row.Length; $f++) { $edges = $this.BinEdges[$f] $binIdx = $this.NBins - 1 # default to last bin for ($b = 1; $b -lt $edges.Length; $b++) { if ($row[$f] -le $edges[$b]) { $binIdx = $b - 1; break } } $binned[$f] = $binIdx } $result += ,$binned } return $result } [double[][]] FitTransform([double[][]]$X) { $this.Fit($X) return $this.Transform($X) } [void] PrintBins([string[]]$featureNames) { Write-Host "" Write-Host "🗂️ Feature Bins ($($this.Strategy), k=$($this.NBins)):" -ForegroundColor Green for ($f = 0; $f -lt $this.BinEdges.Length; $f++) { $name = if ($f -lt $featureNames.Length) { $featureNames[$f] } else { "f$f" } $edges = $this.BinEdges[$f] Write-Host (" {0,-14}:" -f $name) -ForegroundColor Cyan -NoNewline for ($b = 0; $b -lt $this.NBins; $b++) { Write-Host (" [{0:F1},{1:F1})" -f $edges[$b], $edges[$b+1]) -ForegroundColor White -NoNewline } Write-Host "" } Write-Host "" } } # ============================================================ # FEATURE SELECTION # ============================================================ # TEACHING NOTE: More features is NOT always better! # Irrelevant features add noise and slow learning. # # Three simple selection methods: # Variance threshold : remove features with low variance # (if a feature is nearly constant, it carries no info!) # Correlation filter : remove features highly correlated # with each other (they carry the same info - redundant!) # Mutual Information : how much does each feature tell us # about the target? Higher = more useful. # ============================================================ class VarianceSelector { [double] $Threshold [bool[]] $SelectedMask [int[]] $SelectedIndices [bool] $IsFitted = $false VarianceSelector([double]$threshold) { $this.Threshold = $threshold } [void] Fit([double[][]]$X) { $nFeatures = $X[0].Length $this.SelectedMask = @($false) * $nFeatures $selectedList = [System.Collections.ArrayList]::new() for ($f = 0; $f -lt $nFeatures; $f++) { $vals = $X | ForEach-Object { $_[$f] } $mean = ($vals | Measure-Object -Average).Average $sumSq = 0.0 foreach ($v in $vals) { $sumSq += ($v - $mean) * ($v - $mean) } $variance = $sumSq / $vals.Count if ($variance -ge $this.Threshold) { $this.SelectedMask[$f] = $true $selectedList.Add($f) | Out-Null } } $this.SelectedIndices = $selectedList.ToArray() $this.IsFitted = $true } [double[][]] Transform([double[][]]$X) { $result = @() foreach ($row in $X) { $newRow = @(0.0) * $this.SelectedIndices.Length for ($k = 0; $k -lt $this.SelectedIndices.Length; $k++) { $newRow[$k] = $row[$this.SelectedIndices[$k]] } $result += ,$newRow } return $result } [double[][]] FitTransform([double[][]]$X) { $this.Fit($X) return $this.Transform($X) } [void] PrintSummary([string[]]$featureNames) { Write-Host "" Write-Host "🎯 Variance Feature Selection (threshold=$($this.Threshold)):" -ForegroundColor Green for ($f = 0; $f -lt $this.SelectedMask.Length; $f++) { $name = if ($f -lt $featureNames.Length) { $featureNames[$f] } else { "f$f" } $kept = $this.SelectedMask[$f] $icon = if ($kept) { "✅" } else { "❌" } $color = if ($kept) { "White" } else { "DarkGray" } Write-Host (" $icon {0,-20}" -f $name) -ForegroundColor $color } Write-Host (" Kept: {0}/{1} features" -f $this.SelectedIndices.Length, $this.SelectedMask.Length) -ForegroundColor Cyan Write-Host "" } } # Compute Pearson correlation between two feature vectors function Get-Correlation { param([double[]]$a, [double[]]$b) $n = $a.Length $meanA = ($a | Measure-Object -Average).Average $meanB = ($b | Measure-Object -Average).Average $num = 0.0; $da = 0.0; $db = 0.0 for ($i = 0; $i -lt $n; $i++) { $num += ($a[$i] - $meanA) * ($b[$i] - $meanB) $da += ($a[$i] - $meanA) * ($a[$i] - $meanA) $db += ($b[$i] - $meanB) * ($b[$i] - $meanB) } $denom = [Math]::Sqrt($da * $db) $corrVal = if ($denom -gt 1e-10) { $num / $denom } else { 0.0 } return $corrVal } function Get-FeatureCorrelations { param([double[][]]$X, [double[]]$y, [string[]]$featureNames) $nF = $X[0].Length Write-Host "" Write-Host "📈 Feature-Target Correlations:" -ForegroundColor Green Write-Host "" $results = @() for ($f = 0; $f -lt $nF; $f++) { $vals = $X | ForEach-Object { $_[$f] } $corr = Get-Correlation -a $vals -b $y $name = if ($f -lt $featureNames.Length) { $featureNames[$f] } else { "f$f" } $abs = [Math]::Abs($corr) $bar = "█" * [int]($abs * 20) $color = if ($abs -gt 0.7) { "Green" } elseif ($abs -gt 0.4) { "Yellow" } else { "White" } Write-Host (" {0,-15} {1,7:F4} {2}" -f $name, $corr, $bar) -ForegroundColor $color $results += @{ Name=$name; Correlation=$corr; AbsCorr=$abs } } Write-Host "" Write-Host " Green=strong (>0.7), Yellow=moderate (>0.4), White=weak" -ForegroundColor DarkGray Write-Host "" return $results } # ============================================================ # PCA - PRINCIPAL COMPONENT ANALYSIS # ============================================================ # TEACHING NOTE: PCA finds the directions of MAXIMUM VARIANCE. # Imagine 3D data shaped like a flat pancake - most variation # is in 2D, so we can represent it in 2D without losing much! # # How it works: # 1. Center data (subtract mean) # 2. Find eigenvectors of covariance matrix # (eigenvectors = directions of maximum variance) # 3. Project data onto top k eigenvectors # # Result: fewer dimensions, most information preserved! # Explained variance tells us how much info we kept. # ============================================================ class PCA { [int] $NComponents [double[][]] $Components # eigenvectors (principal axes) [double[]] $ExplainedVarianceRatio [double[]] $Mean [bool] $IsFitted = $false PCA([int]$nComponents) { $this.NComponents = $nComponents } # Compute covariance matrix hidden [double[][]] CovMatrix([double[][]]$X) { $n = $X.Length $nF = $X[0].Length $cov = @() for ($i = 0; $i -lt $nF; $i++) { $row = @(0.0) * $nF $cov += ,$row } for ($i = 0; $i -lt $nF; $i++) { for ($j = $i; $j -lt $nF; $j++) { $sum = 0.0 for ($k = 0; $k -lt $n; $k++) { $sum += $X[$k][$i] * $X[$k][$j] } $val = $sum / ($n - 1) $cov[$i][$j] = $val $cov[$j][$i] = $val } } return $cov } # Power iteration to find dominant eigenvector hidden [double[]] PowerIteration([double[][]]$cov, [int]$maxIter) { $n = $cov.Length $rng = [System.Random]::new(42) $vec = @(0.0) * $n for ($i = 0; $i -lt $n; $i++) { $vec[$i] = $rng.NextDouble() } for ($iter = 0; $iter -lt $maxIter; $iter++) { $newVec = @(0.0) * $n for ($i = 0; $i -lt $n; $i++) { for ($j = 0; $j -lt $n; $j++) { $newVec[$i] += $cov[$i][$j] * $vec[$j] } } # Normalize $norm = 0.0 foreach ($v in $newVec) { $norm += $v * $v } $norm = [Math]::Sqrt($norm) if ($norm -gt 1e-10) { for ($i = 0; $i -lt $n; $i++) { $newVec[$i] /= $norm } } $vec = $newVec } return $vec } # Deflate covariance matrix (remove component of found eigenvector) hidden [double[][]] Deflate([double[][]]$cov, [double[]]$eigenvec) { $n = $cov.Length $newCov = @() # Compute eigenvalue = v^T * cov * v $lambda = 0.0 $Av = @(0.0) * $n for ($i = 0; $i -lt $n; $i++) { for ($j = 0; $j -lt $n; $j++) { $Av[$i] += $cov[$i][$j] * $eigenvec[$j] } } for ($i = 0; $i -lt $n; $i++) { $lambda += $eigenvec[$i] * $Av[$i] } for ($i = 0; $i -lt $n; $i++) { $row = @(0.0) * $n for ($j = 0; $j -lt $n; $j++) { $row[$j] = $cov[$i][$j] - $lambda * $eigenvec[$i] * $eigenvec[$j] } $newCov += ,$row } return $newCov } [void] Fit([double[][]]$X) { $n = $X.Length $nF = $X[0].Length # Center data $this.Mean = @(0.0) * $nF for ($f = 0; $f -lt $nF; $f++) { $vals = $X | ForEach-Object { $_[$f] } $this.Mean[$f] = ($vals | Measure-Object -Average).Average } $Xc = @() foreach ($row in $X) { $centered = @(0.0) * $nF for ($f = 0; $f -lt $nF; $f++) { $centered[$f] = $row[$f] - $this.Mean[$f] } $Xc += ,$centered } # Covariance matrix $cov = $this.CovMatrix($Xc) # Find top k eigenvectors via power iteration + deflation $this.Components = @() $eigenvalues = @() $currentCov = $cov $k = [Math]::Min($this.NComponents, $nF) for ($c = 0; $c -lt $k; $c++) { $evec = $this.PowerIteration($currentCov, 100) $this.Components += ,$evec # Eigenvalue = v^T * cov * v $Av = @(0.0) * $nF for ($i = 0; $i -lt $nF; $i++) { for ($j = 0; $j -lt $nF; $j++) { $Av[$i] += $currentCov[$i][$j] * $evec[$j] } } $lam = 0.0 for ($i = 0; $i -lt $nF; $i++) { $lam += $evec[$i] * $Av[$i] } $eigenvalues += [Math]::Abs($lam) $currentCov = $this.Deflate($currentCov, $evec) } # Explained variance ratio $totalVar = ($eigenvalues | Measure-Object -Sum).Sum $this.ExplainedVarianceRatio = @(0.0) * $eigenvalues.Length for ($c = 0; $c -lt $eigenvalues.Length; $c++) { $this.ExplainedVarianceRatio[$c] = if ($totalVar -gt 0) { $eigenvalues[$c] / $totalVar } else { 0.0 } } $this.IsFitted = $true } [double[][]] Transform([double[][]]$X) { $result = @() foreach ($row in $X) { $centered = @(0.0) * $row.Length for ($f = 0; $f -lt $row.Length; $f++) { $centered[$f] = $row[$f] - $this.Mean[$f] } $projected = @(0.0) * $this.Components.Length for ($c = 0; $c -lt $this.Components.Length; $c++) { $dot = 0.0 for ($f = 0; $f -lt $centered.Length; $f++) { $dot += $centered[$f] * $this.Components[$c][$f] } $projected[$c] = $dot } $result += ,$projected } return $result } [double[][]] FitTransform([double[][]]$X) { $this.Fit($X) return $this.Transform($X) } [void] PrintSummary() { Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ PCA Summary ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Components: {0,-24}║" -f $this.NComponents) -ForegroundColor Yellow Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan $cumulative = 0.0 for ($c = 0; $c -lt $this.ExplainedVarianceRatio.Length; $c++) { $evr = [Math]::Round($this.ExplainedVarianceRatio[$c], 4) $cumulative += $evr $bar = "█" * [int]($evr * 30) Write-Host ("║ PC{0}: {1,6:F1}% cum={2,5:F1}% {3,-10}║" -f ($c+1), ($evr*100), ($cumulative*100), $bar) -ForegroundColor White } Write-Host ("║ Total explained: {0,5:F1}% ║" -f ($cumulative*100)) -ForegroundColor Green Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" } } # ============================================================ # TRANSFORMER PIPELINE # ============================================================ # TEACHING NOTE: A pipeline chains transformers so you don't # have to manually call each one. It also prevents DATA LEAKAGE: # fitting scalers on test data would cheat! # Pipeline ensures: fit on train, transform both train and test. # ============================================================ class TransformerPipeline { [System.Collections.ArrayList] $Steps [bool] $IsFitted = $false TransformerPipeline() { $this.Steps = [System.Collections.ArrayList]::new() } [void] Add([string]$name, [object]$transformer) { $this.Steps.Add(@{ Name=$name; Transformer=$transformer }) | Out-Null } [double[][]] FitTransform([double[][]]$X) { $current = $X foreach ($step in $this.Steps) { Write-Host (" ⚙️ $($step.Name)...") -ForegroundColor DarkGray $current = $step.Transformer.FitTransform($current) } $this.IsFitted = $true return $current } [double[][]] Transform([double[][]]$X) { $current = $X foreach ($step in $this.Steps) { $current = $step.Transformer.Transform($current) } return $current } [void] PrintSteps() { Write-Host "" Write-Host "🔧 Transformer Pipeline:" -ForegroundColor Green $i = 1 foreach ($step in $this.Steps) { Write-Host (" Step {0}: {1} [{2}]" -f $i, $step.Name, $step.Transformer.GetType().Name) -ForegroundColor White $i++ } Write-Host "" } } # ============================================================ # TEST # 1. Run VBAF.LoadAll.ps1 # # --- Polynomial Features --- # 2. $X = @(@(2.0, 3.0), @(4.0, 5.0), @(1.0, 2.0)) # $poly = [PolynomialFeatures]::new(2) # $Xpoly = $poly.FitTransform($X, @("size","age")) # $poly.PrintSummary() # # --- Interaction Terms --- # 3. $inter = [InteractionFeatures]::new() # $Xint = $inter.FitTransform($X, @("size","age")) # $inter.PrintSummary() # # --- Feature Binning --- # 4. $data = Get-VBAFTreeDataset -Name "HousePrice" # from Trees module # $binner = [FeatureBinner]::new(4, "quantile") # $binner.Fit($data.X) # $binner.PrintBins($data.Features) # $Xbinned = $binner.Transform($data.X) # # --- Feature Correlations --- # 5. Get-FeatureCorrelations -X $data.X -y $data.yRaw -featureNames $data.Features # # --- Variance Selection --- # 6. $vs = [VarianceSelector]::new(0.5) # $Xsel = $vs.FitTransform($data.X) # $vs.PrintSummary($data.Features) # # --- PCA --- # 7. $pca = [PCA]::new(2) # $Xpca = $pca.FitTransform($data.X) # $pca.PrintSummary() # Write-Host "Shape: $($data.X[0].Length) features -> $($Xpca[0].Length) components" # # --- Full Pipeline --- # 8. $pipe = [TransformerPipeline]::new() # $pipe.Add("Imputer", [MissingValueImputer]::new("median")) # needs DataPipeline # $pipe.Add("Scaler", [RobustScaler]::new()) # needs DataPipeline # $pipe.Add("Poly", [PolynomialFeatures]::new(2)) # $pipe.PrintSteps() # $Xout = $pipe.FitTransform($data.X) # ============================================================ Write-Host "📦 VBAF.ML.FeatureEngineering.ps1 loaded" -ForegroundColor Green Write-Host " Classes : PolynomialFeatures" -ForegroundColor Cyan Write-Host " InteractionFeatures" -ForegroundColor Cyan Write-Host " FeatureBinner" -ForegroundColor Cyan Write-Host " VarianceSelector" -ForegroundColor Cyan Write-Host " PCA" -ForegroundColor Cyan Write-Host " TransformerPipeline" -ForegroundColor Cyan Write-Host " Functions : Get-Correlation" -ForegroundColor Cyan Write-Host " Get-FeatureCorrelations" -ForegroundColor Cyan Write-Host "" Write-Host " Quick start:" -ForegroundColor Yellow Write-Host ' $X = @(@(2.0,3.0),@(4.0,5.0),@(1.0,2.0))' -ForegroundColor White Write-Host ' $poly = [PolynomialFeatures]::new(2)' -ForegroundColor White Write-Host ' $Xp = $poly.FitTransform($X, @("size","age"))' -ForegroundColor White Write-Host ' $poly.PrintSummary()' -ForegroundColor White Write-Host "" |