VBAF

3.0.0

VBAF.ML.FeatureEngineering.ps1

                                #Requires -Version 5.1

<#

.SYNOPSIS

    Feature Engineering - Create Better Features for ML

.DESCRIPTION

    Implements feature engineering from scratch.

    Designed as a TEACHING resource - every step explained.

    Features included:

      - Polynomial features   : x, x^2, x^3, x1*x2 combinations

      - Interaction terms     : explicit pairwise feature products

      - Feature binning       : continuous -> discrete buckets

      - Feature selection     : variance, correlation, mutual info

      - PCA                   : dimensionality reduction

      - Pipeline              : chain transformers in sequence

    Standalone - no external VBAF dependencies required.

.NOTES

    Part of VBAF - Phase 5 Feature Engineering Module

    PS 5.1 compatible

    Teaching project - why and how of each transformation!

#>

$basePath = $PSScriptRoot

# ============================================================

# TEACHING NOTE: What is Feature Engineering?

# "Better features = better models" - this is often MORE

# important than choosing the right algorithm!

#

# Raw features are what you measure. Engineered features

# capture RELATIONSHIPS and PATTERNS that algorithms can't

# discover on their own.

#

# Example: predicting house price

#   Raw     : [size, floors]

#   Engineered: [size, floors, size^2, size*floors, size_per_floor]

# The model can now find non-linear relationships!

# ============================================================

# ============================================================

# POLYNOMIAL FEATURES

# ============================================================

# TEACHING NOTE: Linear models can only find straight-line

# relationships. Polynomial features let linear models fit CURVES!

#

# For features [x1, x2] with degree=2:

#   Output: [1, x1, x2, x1^2, x1*x2, x2^2]

#   The model learns: y = a + b*x1 + c*x2 + d*x1^2 + e*x1*x2 + f*x2^2

#

# WARNING: degree=3 with 10 features -> 286 columns!

# More features = more risk of overfitting!

# ============================================================

class PolynomialFeatures {

    [int]    $Degree

    [bool]   $IncludeBias        # include column of 1s

    [bool]   $InteractionOnly    # only x1*x2, skip x1^2

    [int]    $NInputFeatures

    [int]    $NOutputFeatures

    [bool]   $IsFitted = $false

    [string[]] $FeatureNames

    PolynomialFeatures() {

        $this.Degree          = 2

        $this.IncludeBias     = $false

        $this.InteractionOnly = $false

    }

    PolynomialFeatures([int]$degree) {

        $this.Degree          = $degree

        $this.IncludeBias     = $false

        $this.InteractionOnly = $false

    }

    PolynomialFeatures([int]$degree, [bool]$interactionOnly) {

        $this.Degree          = $degree

        $this.IncludeBias     = $false

        $this.InteractionOnly = $interactionOnly

    }

    # Generate all combinations of feature indices up to given degree

    hidden [System.Collections.ArrayList] GetCombinations([int]$nFeatures) {

        $combos = [System.Collections.ArrayList]::new()

        if ($this.IncludeBias) { $combos.Add(@()) | Out-Null }

        # Degree 1: original features

        for ($i = 0; $i -lt $nFeatures; $i++) {

            $combos.Add(@($i)) | Out-Null

        }

        # Degree 2+

        for ($d = 2; $d -le $this.Degree; $d++) {

            for ($i = 0; $i -lt $nFeatures; $i++) {

                for ($j = $i; $j -lt $nFeatures; $j++) {

                    if ($this.InteractionOnly -and $i -eq $j) { continue }

                    $combo = @($i, $j)

                    $combos.Add($combo) | Out-Null

                }

            }

        }

        return $combos

    }

    [void] Fit([double[][]]$X, [string[]]$featureNames) {

        $this.NInputFeatures = $X[0].Length

        $combos              = $this.GetCombinations($this.NInputFeatures)

        $this.NOutputFeatures = $combos.Count

        # Build feature names

        $names = [System.Collections.ArrayList]::new()

        foreach ($combo in $combos) {

            if ($combo.Length -eq 0) { $names.Add("1") | Out-Null; continue }

            $parts = @()

            $prev  = -1; $exp = 1

            for ($k = 0; $k -lt $combo.Length; $k++) {

                $fi = $combo[$k]

                $fn = if ($fi -lt $featureNames.Length) { $featureNames[$fi] } else { "f$fi" }

                if ($fi -eq $prev) { $exp++ } else {

                    if ($prev -ge 0) {

                        $pfn = if ($prev -lt $featureNames.Length) { $featureNames[$prev] } else { "f$prev" }

                        $parts += if ($exp -gt 1) { "${pfn}^$exp" } else { $pfn }

                    }

                    $prev = $fi; $exp = 1

                }

            }

            $fi  = $combo[-1]

            $pfn = if ($fi -lt $featureNames.Length) { $featureNames[$fi] } else { "f$fi" }

            $parts += if ($exp -gt 1) { "${pfn}^$exp" } else { $pfn }

            $names.Add($parts -join "*") | Out-Null

        }

        $this.FeatureNames = $names.ToArray()

        $this.IsFitted     = $true

    }

    [void] Fit([double[][]]$X) {

        $names = @(); for ($i = 0; $i -lt $X[0].Length; $i++) { $names += "x$i" }

        $this.Fit($X, $names)

    }

    [double[][]] Transform([double[][]]$X) {

        $combos = $this.GetCombinations($X[0].Length)

        $result = @()

        foreach ($row in $X) {

            $newRow = @(0.0) * $combos.Count

            for ($c = 0; $c -lt $combos.Count; $c++) {

                $combo = $combos[$c]

                if ($combo.Length -eq 0) { $newRow[$c] = 1.0; continue }

                $val = 1.0

                foreach ($fi in $combo) { $val *= $row[$fi] }

                $newRow[$c] = $val

            }

            $result += ,$newRow

        }

        return $result

    }

    [double[][]] FitTransform([double[][]]$X) {

        $this.Fit($X)

        return $this.Transform($X)

    }

    [double[][]] FitTransform([double[][]]$X, [string[]]$featureNames) {

        $this.Fit($X, $featureNames)

        return $this.Transform($X)

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║       Polynomial Features            ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Degree          : {0,-18}║" -f $this.Degree)             -ForegroundColor Yellow

        Write-Host ("║  Interaction only: {0,-18}║" -f $this.InteractionOnly)    -ForegroundColor Yellow

        Write-Host ("║  Input features  : {0,-18}║" -f $this.NInputFeatures)     -ForegroundColor White

        Write-Host ("║  Output features : {0,-18}║" -f $this.NOutputFeatures)    -ForegroundColor Green

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        foreach ($name in $this.FeatureNames) {

            Write-Host ("║  {0,-36}║" -f $name) -ForegroundColor White

        }

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

}

# ============================================================

# INTERACTION TERMS (explicit, readable)

# ============================================================

# TEACHING NOTE: Interactions capture COMBINED effects.

# e.g. size*age: big old house behaves differently than

# big new house or small old house.

# More interpretable than full polynomial expansion!

# ============================================================

class InteractionFeatures {

    [string[]] $FeatureNames

    [bool]     $IsFitted = $false

    InteractionFeatures() {}

    # Add all pairwise products to feature matrix

    [double[][]] FitTransform([double[][]]$X, [string[]]$featureNames) {

        $this.FeatureNames = $featureNames

        $n          = $X.Length

        $nF         = $X[0].Length

        $result     = @()

        foreach ($row in $X) {

            $extras = @()

            for ($i = 0; $i -lt $nF; $i++) {

                for ($j = $i + 1; $j -lt $nF; $j++) {

                    $extras += $row[$i] * $row[$j]

                }

            }

            $newRow = @(0.0) * ($nF + $extras.Length)

            for ($k = 0; $k -lt $nF; $k++) { $newRow[$k] = $row[$k] }

            for ($k = 0; $k -lt $extras.Length; $k++) { $newRow[$nF + $k] = $extras[$k] }

            $result += ,$newRow

        }

        # Build output feature names

        $allNames = [System.Collections.ArrayList]::new()

        foreach ($n2 in $featureNames) { $allNames.Add($n2) | Out-Null }

        for ($i = 0; $i -lt $nF; $i++) {

            for ($j = $i + 1; $j -lt $nF; $j++) {

                $allNames.Add("$($featureNames[$i])*$($featureNames[$j])") | Out-Null

            }

        }

        $this.FeatureNames = $allNames.ToArray()

        $this.IsFitted     = $true

        return $result

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "🔗 Interaction Features:" -ForegroundColor Green

        foreach ($name in $this.FeatureNames) {

            $color = if ($name -match '\*') { "Yellow" } else { "White" }

            Write-Host ("   {0}" -f $name) -ForegroundColor $color

        }

        Write-Host ""

    }

}

# ============================================================

# FEATURE BINNING

# ============================================================

# TEACHING NOTE: Binning converts continuous numbers to categories.

# Why bin?

#   - Makes non-linear patterns easier to learn

#   - Reduces sensitivity to small measurement errors

#   - "Age 25-35" might matter more than exact age

#

# Two strategies:

#   Uniform  : equal-width bins (e.g. 0-10, 10-20, 20-30)

#   Quantile : equal-frequency bins (same number of points each)

#              More robust when data is skewed!

# ============================================================

class FeatureBinner {

    [string]   $Strategy    # "uniform" or "quantile"

    [int]      $NBins

    [double[][]] $BinEdges  # one array of edges per feature

    [bool]     $IsFitted = $false

    FeatureBinner([int]$nBins) {

        $this.NBins    = $nBins

        $this.Strategy = "quantile"

    }

    FeatureBinner([int]$nBins, [string]$strategy) {

        $this.NBins    = $nBins

        $this.Strategy = $strategy

    }

    hidden [double] Percentile([double[]]$sorted, [double]$p) {

        $idx = $p / 100.0 * ($sorted.Length - 1)

        $lo  = [int][Math]::Floor($idx)

        $hi  = [int][Math]::Ceiling($idx)

        if ($lo -eq $hi) { return $sorted[$lo] }

        return $sorted[$lo] + ($idx - $lo) * ($sorted[$hi] - $sorted[$lo])

    }

    [void] Fit([double[][]]$X) {

        $nFeatures      = $X[0].Length

        $this.BinEdges  = @()

        for ($f = 0; $f -lt $nFeatures; $f++) {

            $vals = ($X | ForEach-Object { $_[$f] }) | Sort-Object

            if ($this.Strategy -eq "uniform") {

                $minV  = $vals[0]

                $maxV  = $vals[-1]

                $step  = ($maxV - $minV) / $this.NBins

                $edges = @($minV)

                for ($b = 1; $b -le $this.NBins; $b++) {

                    $edges += $minV + $b * $step

                }

            } else {

                # Quantile edges

                $edges = @()

                for ($b = 0; $b -le $this.NBins; $b++) {

                    $p = $b * 100.0 / $this.NBins

                    $edges += $this.Percentile($vals, $p)

                }

            }

            $this.BinEdges += ,$edges

        }

        $this.IsFitted = $true

    }

    [double[][]] Transform([double[][]]$X) {

        $result = @()

        foreach ($row in $X) {

            $binned = @(0.0) * $row.Length

            for ($f = 0; $f -lt $row.Length; $f++) {

                $edges  = $this.BinEdges[$f]

                $binIdx = $this.NBins - 1  # default to last bin

                for ($b = 1; $b -lt $edges.Length; $b++) {

                    if ($row[$f] -le $edges[$b]) { $binIdx = $b - 1; break }

                }

                $binned[$f] = $binIdx

            }

            $result += ,$binned

        }

        return $result

    }

    [double[][]] FitTransform([double[][]]$X) {

        $this.Fit($X)

        return $this.Transform($X)

    }

    [void] PrintBins([string[]]$featureNames) {

        Write-Host ""

        Write-Host "🗂️  Feature Bins ($($this.Strategy), k=$($this.NBins)):" -ForegroundColor Green

        for ($f = 0; $f -lt $this.BinEdges.Length; $f++) {

            $name  = if ($f -lt $featureNames.Length) { $featureNames[$f] } else { "f$f" }

            $edges = $this.BinEdges[$f]

            Write-Host ("  {0,-14}:" -f $name) -ForegroundColor Cyan -NoNewline

            for ($b = 0; $b -lt $this.NBins; $b++) {

                Write-Host (" [{0:F1},{1:F1})" -f $edges[$b], $edges[$b+1]) -ForegroundColor White -NoNewline

            }

            Write-Host ""

        }

        Write-Host ""

    }

}

# ============================================================

# FEATURE SELECTION

# ============================================================

# TEACHING NOTE: More features is NOT always better!

# Irrelevant features add noise and slow learning.

#

# Three simple selection methods:

#   Variance threshold : remove features with low variance

#     (if a feature is nearly constant, it carries no info!)

#   Correlation filter : remove features highly correlated

#     with each other (they carry the same info - redundant!)

#   Mutual Information : how much does each feature tell us

#     about the target? Higher = more useful.

# ============================================================

class VarianceSelector {

    [double]   $Threshold

    [bool[]]   $SelectedMask

    [int[]]    $SelectedIndices

    [bool]     $IsFitted = $false

    VarianceSelector([double]$threshold) { $this.Threshold = $threshold }

    [void] Fit([double[][]]$X) {

        $nFeatures           = $X[0].Length

        $this.SelectedMask   = @($false) * $nFeatures

        $selectedList        = [System.Collections.ArrayList]::new()

        for ($f = 0; $f -lt $nFeatures; $f++) {

            $vals = $X | ForEach-Object { $_[$f] }

            $mean = ($vals | Measure-Object -Average).Average

            $sumSq = 0.0

            foreach ($v in $vals) { $sumSq += ($v - $mean) * ($v - $mean) }

            $variance = $sumSq / $vals.Count

            if ($variance -ge $this.Threshold) {

                $this.SelectedMask[$f] = $true

                $selectedList.Add($f) | Out-Null

            }

        }

        $this.SelectedIndices = $selectedList.ToArray()

        $this.IsFitted        = $true

    }

    [double[][]] Transform([double[][]]$X) {

        $result = @()

        foreach ($row in $X) {

            $newRow = @(0.0) * $this.SelectedIndices.Length

            for ($k = 0; $k -lt $this.SelectedIndices.Length; $k++) {

                $newRow[$k] = $row[$this.SelectedIndices[$k]]

            }

            $result += ,$newRow

        }

        return $result

    }

    [double[][]] FitTransform([double[][]]$X) {

        $this.Fit($X)

        return $this.Transform($X)

    }

    [void] PrintSummary([string[]]$featureNames) {

        Write-Host ""

        Write-Host "🎯 Variance Feature Selection (threshold=$($this.Threshold)):" -ForegroundColor Green

        for ($f = 0; $f -lt $this.SelectedMask.Length; $f++) {

            $name  = if ($f -lt $featureNames.Length) { $featureNames[$f] } else { "f$f" }

            $kept  = $this.SelectedMask[$f]

            $icon  = if ($kept) { "✅" } else { "❌" }

            $color = if ($kept) { "White" } else { "DarkGray" }

            Write-Host ("  $icon {0,-20}" -f $name) -ForegroundColor $color

        }

        Write-Host ("  Kept: {0}/{1} features" -f $this.SelectedIndices.Length, $this.SelectedMask.Length) -ForegroundColor Cyan

        Write-Host ""

    }

}

# Compute Pearson correlation between two feature vectors

function Get-Correlation {

    param([double[]]$a, [double[]]$b)

    $n    = $a.Length

    $meanA = ($a | Measure-Object -Average).Average

    $meanB = ($b | Measure-Object -Average).Average

    $num  = 0.0; $da = 0.0; $db = 0.0

    for ($i = 0; $i -lt $n; $i++) {

        $num += ($a[$i] - $meanA) * ($b[$i] - $meanB)

        $da  += ($a[$i] - $meanA) * ($a[$i] - $meanA)

        $db  += ($b[$i] - $meanB) * ($b[$i] - $meanB)

    }

    $denom = [Math]::Sqrt($da * $db)

    $corrVal = if ($denom -gt 1e-10) { $num / $denom } else { 0.0 }

    return $corrVal

}

function Get-FeatureCorrelations {

    param([double[][]]$X, [double[]]$y, [string[]]$featureNames)

    $nF = $X[0].Length

    Write-Host ""

    Write-Host "📈 Feature-Target Correlations:" -ForegroundColor Green

    Write-Host ""

    $results = @()

    for ($f = 0; $f -lt $nF; $f++) {

        $vals = $X | ForEach-Object { $_[$f] }

        $corr = Get-Correlation -a $vals -b $y

        $name = if ($f -lt $featureNames.Length) { $featureNames[$f] } else { "f$f" }

        $abs  = [Math]::Abs($corr)

        $bar  = "█" * [int]($abs * 20)

        $color = if ($abs -gt 0.7) { "Green" } elseif ($abs -gt 0.4) { "Yellow" } else { "White" }

        Write-Host ("  {0,-15} {1,7:F4}  {2}" -f $name, $corr, $bar) -ForegroundColor $color

        $results += @{ Name=$name; Correlation=$corr; AbsCorr=$abs }

    }

    Write-Host ""

    Write-Host "  Green=strong (>0.7), Yellow=moderate (>0.4), White=weak" -ForegroundColor DarkGray

    Write-Host ""

    return $results

}

# ============================================================

# PCA - PRINCIPAL COMPONENT ANALYSIS

# ============================================================

# TEACHING NOTE: PCA finds the directions of MAXIMUM VARIANCE.

# Imagine 3D data shaped like a flat pancake - most variation

# is in 2D, so we can represent it in 2D without losing much!

#

# How it works:

#   1. Center data (subtract mean)

#   2. Find eigenvectors of covariance matrix

#      (eigenvectors = directions of maximum variance)

#   3. Project data onto top k eigenvectors

#

# Result: fewer dimensions, most information preserved!

# Explained variance tells us how much info we kept.

# ============================================================

class PCA {

    [int]      $NComponents

    [double[][]] $Components    # eigenvectors (principal axes)

    [double[]] $ExplainedVarianceRatio

    [double[]] $Mean

    [bool]     $IsFitted = $false

    PCA([int]$nComponents) { $this.NComponents = $nComponents }

    # Compute covariance matrix

    hidden [double[][]] CovMatrix([double[][]]$X) {

        $n  = $X.Length

        $nF = $X[0].Length

        $cov = @()

        for ($i = 0; $i -lt $nF; $i++) {

            $row = @(0.0) * $nF

            $cov += ,$row

        }

        for ($i = 0; $i -lt $nF; $i++) {

            for ($j = $i; $j -lt $nF; $j++) {

                $sum = 0.0

                for ($k = 0; $k -lt $n; $k++) {

                    $sum += $X[$k][$i] * $X[$k][$j]

                }

                $val          = $sum / ($n - 1)

                $cov[$i][$j]  = $val

                $cov[$j][$i]  = $val

            }

        }

        return $cov

    }

    # Power iteration to find dominant eigenvector

    hidden [double[]] PowerIteration([double[][]]$cov, [int]$maxIter) {

        $n   = $cov.Length

        $rng = [System.Random]::new(42)

        $vec = @(0.0) * $n

        for ($i = 0; $i -lt $n; $i++) { $vec[$i] = $rng.NextDouble() }

        for ($iter = 0; $iter -lt $maxIter; $iter++) {

            $newVec = @(0.0) * $n

            for ($i = 0; $i -lt $n; $i++) {

                for ($j = 0; $j -lt $n; $j++) {

                    $newVec[$i] += $cov[$i][$j] * $vec[$j]

                }

            }

            # Normalize

            $norm = 0.0

            foreach ($v in $newVec) { $norm += $v * $v }

            $norm = [Math]::Sqrt($norm)

            if ($norm -gt 1e-10) {

                for ($i = 0; $i -lt $n; $i++) { $newVec[$i] /= $norm }

            }

            $vec = $newVec

        }

        return $vec

    }

    # Deflate covariance matrix (remove component of found eigenvector)

    hidden [double[][]] Deflate([double[][]]$cov, [double[]]$eigenvec) {

        $n      = $cov.Length

        $newCov = @()

        # Compute eigenvalue = v^T * cov * v

        $lambda = 0.0

        $Av     = @(0.0) * $n

        for ($i = 0; $i -lt $n; $i++) {

            for ($j = 0; $j -lt $n; $j++) { $Av[$i] += $cov[$i][$j] * $eigenvec[$j] }

        }

        for ($i = 0; $i -lt $n; $i++) { $lambda += $eigenvec[$i] * $Av[$i] }

        for ($i = 0; $i -lt $n; $i++) {

            $row = @(0.0) * $n

            for ($j = 0; $j -lt $n; $j++) {

                $row[$j] = $cov[$i][$j] - $lambda * $eigenvec[$i] * $eigenvec[$j]

            }

            $newCov += ,$row

        }

        return $newCov

    }

    [void] Fit([double[][]]$X) {

        $n  = $X.Length

        $nF = $X[0].Length

        # Center data

        $this.Mean = @(0.0) * $nF

        for ($f = 0; $f -lt $nF; $f++) {

            $vals = $X | ForEach-Object { $_[$f] }

            $this.Mean[$f] = ($vals | Measure-Object -Average).Average

        }

        $Xc = @()

        foreach ($row in $X) {

            $centered = @(0.0) * $nF

            for ($f = 0; $f -lt $nF; $f++) { $centered[$f] = $row[$f] - $this.Mean[$f] }

            $Xc += ,$centered

        }

        # Covariance matrix

        $cov = $this.CovMatrix($Xc)

        # Find top k eigenvectors via power iteration + deflation

        $this.Components = @()

        $eigenvalues     = @()

        $currentCov      = $cov

        $k = [Math]::Min($this.NComponents, $nF)

        for ($c = 0; $c -lt $k; $c++) {

            $evec        = $this.PowerIteration($currentCov, 100)

            $this.Components += ,$evec

            # Eigenvalue = v^T * cov * v

            $Av     = @(0.0) * $nF

            for ($i = 0; $i -lt $nF; $i++) {

                for ($j = 0; $j -lt $nF; $j++) { $Av[$i] += $currentCov[$i][$j] * $evec[$j] }

            }

            $lam = 0.0

            for ($i = 0; $i -lt $nF; $i++) { $lam += $evec[$i] * $Av[$i] }

            $eigenvalues += [Math]::Abs($lam)

            $currentCov = $this.Deflate($currentCov, $evec)

        }

        # Explained variance ratio

        $totalVar = ($eigenvalues | Measure-Object -Sum).Sum

        $this.ExplainedVarianceRatio = @(0.0) * $eigenvalues.Length

        for ($c = 0; $c -lt $eigenvalues.Length; $c++) {

            $this.ExplainedVarianceRatio[$c] = if ($totalVar -gt 0) {

                $eigenvalues[$c] / $totalVar

            } else { 0.0 }

        }

        $this.IsFitted = $true

    }

    [double[][]] Transform([double[][]]$X) {

        $result = @()

        foreach ($row in $X) {

            $centered = @(0.0) * $row.Length

            for ($f = 0; $f -lt $row.Length; $f++) { $centered[$f] = $row[$f] - $this.Mean[$f] }

            $projected = @(0.0) * $this.Components.Length

            for ($c = 0; $c -lt $this.Components.Length; $c++) {

                $dot = 0.0

                for ($f = 0; $f -lt $centered.Length; $f++) {

                    $dot += $centered[$f] * $this.Components[$c][$f]

                }

                $projected[$c] = $dot

            }

            $result += ,$projected

        }

        return $result

    }

    [double[][]] FitTransform([double[][]]$X) {

        $this.Fit($X)

        return $this.Transform($X)

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║         PCA Summary                  ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Components: {0,-24}║" -f $this.NComponents) -ForegroundColor Yellow

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        $cumulative = 0.0

        for ($c = 0; $c -lt $this.ExplainedVarianceRatio.Length; $c++) {

            $evr        = [Math]::Round($this.ExplainedVarianceRatio[$c], 4)

            $cumulative += $evr

            $bar        = "█" * [int]($evr * 30)

            Write-Host ("║  PC{0}: {1,6:F1}%  cum={2,5:F1}%  {3,-10}║" -f

                ($c+1), ($evr*100), ($cumulative*100), $bar) -ForegroundColor White

        }

        Write-Host ("║  Total explained: {0,5:F1}%            ║" -f ($cumulative*100)) -ForegroundColor Green

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

}

# ============================================================

# TRANSFORMER PIPELINE

# ============================================================

# TEACHING NOTE: A pipeline chains transformers so you don't

# have to manually call each one. It also prevents DATA LEAKAGE:

# fitting scalers on test data would cheat!

# Pipeline ensures: fit on train, transform both train and test.

# ============================================================

class TransformerPipeline {

    [System.Collections.ArrayList] $Steps

    [bool] $IsFitted = $false

    TransformerPipeline() {

        $this.Steps = [System.Collections.ArrayList]::new()

    }

    [void] Add([string]$name, [object]$transformer) {

        $this.Steps.Add(@{ Name=$name; Transformer=$transformer }) | Out-Null

    }

    [double[][]] FitTransform([double[][]]$X) {

        $current = $X

        foreach ($step in $this.Steps) {

            Write-Host ("  ⚙️  $($step.Name)...") -ForegroundColor DarkGray

            $current = $step.Transformer.FitTransform($current)

        }

        $this.IsFitted = $true

        return $current

    }

    [double[][]] Transform([double[][]]$X) {

        $current = $X

        foreach ($step in $this.Steps) {

            $current = $step.Transformer.Transform($current)

        }

        return $current

    }

    [void] PrintSteps() {

        Write-Host ""

        Write-Host "🔧 Transformer Pipeline:" -ForegroundColor Green

        $i = 1

        foreach ($step in $this.Steps) {

            Write-Host ("  Step {0}: {1} [{2}]" -f $i, $step.Name, $step.Transformer.GetType().Name) -ForegroundColor White

            $i++

        }

        Write-Host ""

    }

}

# ============================================================

# TEST

# 1. Run VBAF.LoadAll.ps1

#

# --- Polynomial Features ---

# 2. $X    = @(@(2.0, 3.0), @(4.0, 5.0), @(1.0, 2.0))

#    $poly  = [PolynomialFeatures]::new(2)

#    $Xpoly = $poly.FitTransform($X, @("size","age"))

#    $poly.PrintSummary()

#

# --- Interaction Terms ---

# 3. $inter = [InteractionFeatures]::new()

#    $Xint  = $inter.FitTransform($X, @("size","age"))

#    $inter.PrintSummary()

#

# --- Feature Binning ---

# 4. $data  = Get-VBAFTreeDataset -Name "HousePrice"  # from Trees module

#    $binner = [FeatureBinner]::new(4, "quantile")

#    $binner.Fit($data.X)

#    $binner.PrintBins($data.Features)

#    $Xbinned = $binner.Transform($data.X)

#

# --- Feature Correlations ---

# 5. Get-FeatureCorrelations -X $data.X -y $data.yRaw -featureNames $data.Features

#

# --- Variance Selection ---

# 6. $vs    = [VarianceSelector]::new(0.5)

#    $Xsel  = $vs.FitTransform($data.X)

#    $vs.PrintSummary($data.Features)

#

# --- PCA ---

# 7. $pca   = [PCA]::new(2)

#    $Xpca  = $pca.FitTransform($data.X)

#    $pca.PrintSummary()

#    Write-Host "Shape: $($data.X[0].Length) features -> $($Xpca[0].Length) components"

#

# --- Full Pipeline ---

# 8. $pipe = [TransformerPipeline]::new()

#    $pipe.Add("Imputer",  [MissingValueImputer]::new("median"))  # needs DataPipeline

#    $pipe.Add("Scaler",   [RobustScaler]::new())                 # needs DataPipeline

#    $pipe.Add("Poly",     [PolynomialFeatures]::new(2))

#    $pipe.PrintSteps()

#    $Xout = $pipe.FitTransform($data.X)

# ============================================================

Write-Host "📦 VBAF.ML.FeatureEngineering.ps1 loaded" -ForegroundColor Green

Write-Host "   Classes   : PolynomialFeatures"              -ForegroundColor Cyan

Write-Host "              InteractionFeatures"              -ForegroundColor Cyan

Write-Host "              FeatureBinner"                    -ForegroundColor Cyan

Write-Host "              VarianceSelector"                 -ForegroundColor Cyan

Write-Host "              PCA"                              -ForegroundColor Cyan

Write-Host "              TransformerPipeline"             -ForegroundColor Cyan

Write-Host "   Functions : Get-Correlation"                -ForegroundColor Cyan

Write-Host "              Get-FeatureCorrelations"         -ForegroundColor Cyan

Write-Host ""

Write-Host "   Quick start:" -ForegroundColor Yellow

Write-Host '   $X    = @(@(2.0,3.0),@(4.0,5.0),@(1.0,2.0))'     -ForegroundColor White

Write-Host '   $poly = [PolynomialFeatures]::new(2)'              -ForegroundColor White

Write-Host '   $Xp   = $poly.FitTransform($X, @("size","age"))'   -ForegroundColor White

Write-Host '   $poly.PrintSummary()'                              -ForegroundColor White

Write-Host ""