VBAF.ML.AutoML.ps1
|
#Requires -Version 5.1 <# .SYNOPSIS AutoML - Automated Machine Learning for VBAF .DESCRIPTION Implements AutoML workflows from scratch. Designed as a TEACHING resource - every step explained. Features included: - Hyperparameter optimization : Grid, Random, Bayesian search - Algorithm selection : auto-select best model type - Feature selection : auto-select best features - Pipeline automation : chain steps automatically Neural Architecture Search: skipped - requires repeated neural network backprop which is not reliable in PS 5.1 class methods. All other features are pure math/loops - fully PS 5.1 compatible! .NOTES Part of VBAF - Phase 7 Production Features - v2.1.0 PS 5.1 compatible Teaching project - AutoML concepts explained step by step! #> # ============================================================ # TEACHING NOTE: What is AutoML? # AutoML = Automated Machine Learning. # Instead of manually tuning hyperparameters, AutoML searches # the space of possible configurations automatically! # # Three main search strategies: # GRID SEARCH : try every combination (exhaustive, slow) # RANDOM SEARCH : try random combinations (faster, surprisingly good!) # BAYESIAN OPT : use past results to guide next try (smartest!) # # Bayesian optimization key insight: # After each trial, we build a model of "which configs are promising" # and sample from promising regions rather than randomly. # This converges much faster than random search! # ============================================================ # ============================================================ # CROSS-VALIDATION HELPER (shared by all search methods) # ============================================================ function Invoke-AutoMLCrossVal { param( [object] $Model, [double[][]] $X, [double[]] $y, [int] $Folds = 5, [string] $Metric = "R2" # R2, RMSE, Accuracy ) $n = $X.Length $foldSz = [int]([Math]::Floor($n / $Folds)) $scores = @() for ($f = 0; $f -lt $Folds; $f++) { $valStart = $f * $foldSz $valEnd = [Math]::Min($valStart + $foldSz - 1, $n - 1) $trainX = @(); $trainY = @(); $valX = @(); $valY = @() for ($i = 0; $i -lt $n; $i++) { if ($i -ge $valStart -and $i -le $valEnd) { $valX += ,[double[]]$X[$i]; $valY += $y[$i] } else { $trainX += ,[double[]]$X[$i]; $trainY += $y[$i] } } if ($trainX.Count -eq 0 -or $valX.Count -eq 0) { continue } try { $Model.Fit($trainX, $trainY) $preds = $Model.Predict($valX) $score = switch ($Metric) { "R2" { $mean = ($valY | Measure-Object -Average).Average $ssTot = ($valY | ForEach-Object { ($_ - $mean)*($_ - $mean) } | Measure-Object -Sum).Sum $ssRes = 0.0 for ($i = 0; $i -lt $valY.Count; $i++) { $ssRes += ($valY[$i] - $preds[$i]) * ($valY[$i] - $preds[$i]) } if ($ssTot -gt 0) { 1.0 - $ssRes / $ssTot } else { 1.0 } } "RMSE" { $mse = 0.0 for ($i = 0; $i -lt $valY.Count; $i++) { $mse += ($valY[$i] - $preds[$i]) * ($valY[$i] - $preds[$i]) } -[Math]::Sqrt($mse / $valY.Count) # negative so higher=better } "Accuracy" { $correct = 0 for ($i = 0; $i -lt $valY.Count; $i++) { if ([int]$preds[$i] -eq [int]$valY[$i]) { $correct++ } } $correct / $valY.Count } default { 0.0 } } $scores += $score } catch { } } if ($scores.Count -eq 0) { return 0.0 } return ($scores | Measure-Object -Average).Average } # ============================================================ # GRID SEARCH # ============================================================ # TEACHING NOTE: Grid search = exhaustive search. # You define a grid of hyperparameter values: # LearningRate: [0.001, 0.01, 0.1] # Lambda: [0.0, 0.1, 1.0] # Grid search tries ALL 3x3=9 combinations. # Great for small grids, impractical for large ones! # 10 params x 10 values each = 10^10 combinations (impossible!) # ============================================================ function Invoke-VBAFGridSearch { param( [scriptblock] $ModelFactory, # { param($params) return [Model]::new($params.Lambda) } [hashtable] $ParamGrid, # @{ Lambda=@(0.0,0.1,1.0); Lr=@(0.001,0.01) } [double[][]] $X, [double[]] $y, [int] $Folds = 5, [string] $Metric = "R2" ) # Build all combinations $keys = @($ParamGrid.Keys) $combos = @(@{}) foreach ($key in $keys) { $newCombos = @() foreach ($existing in $combos) { foreach ($val in $ParamGrid[$key]) { $newCombo = @{} foreach ($k in $existing.Keys) { $newCombo[$k] = $existing[$k] } $newCombo[$key] = $val $newCombos += $newCombo } } $combos = $newCombos } Write-Host "" Write-Host ("🔲 Grid Search: {0} combinations x {1} folds = {2} fits" -f $combos.Count, $Folds, ($combos.Count * $Folds)) -ForegroundColor Green $results = @() $best = $null $bestScore= [double]::MinValue $trial = 0 foreach ($combo in $combos) { $trial++ try { $model = & $ModelFactory $combo $score = Invoke-AutoMLCrossVal -Model $model -X $X -y $y -Folds $Folds -Metric $Metric } catch { $score = [double]::MinValue } $paramStr = ($combo.Keys | ForEach-Object { "{0}={1}" -f $_, $combo[$_] }) -join " " $isBest = $score -gt $bestScore if ($isBest) { $bestScore = $score; $best = $combo } $marker = if ($isBest) { " ★" } else { "" } $color = if ($isBest) { "Green" } else { "DarkGray" } Write-Host (" [{0,3}/{1}] {2,-35} {3}={4:F4}{5}" -f $trial, $combos.Count, $paramStr, $Metric, $score, $marker) -ForegroundColor $color $results += @{ Params=$combo; Score=$score } } Write-Host "" Write-Host ("✅ Best: {0}={1:F4}" -f $Metric, $bestScore) -ForegroundColor Green $paramStr = ($best.Keys | ForEach-Object { "{0}={1}" -f $_, $best[$_] }) -join " " Write-Host (" Params: {0}" -f $paramStr) -ForegroundColor White Write-Host "" return @{ BestParams=$best; BestScore=$bestScore; AllResults=$results } } # ============================================================ # RANDOM SEARCH # ============================================================ # TEACHING NOTE: Random search = sample random combinations. # Key insight from Bergstra & Bengio (2012): # Random search finds good configs faster than grid search # because not all hyperparameters matter equally! # If only 2 of 10 params matter, grid search wastes time # on the unimportant 8. Random search covers all 10 better # with the same number of trials. # ============================================================ function Invoke-VBAFRandomSearch { param( [scriptblock] $ModelFactory, [hashtable] $ParamSpace, # @{ Lambda=@(0.0,0.1,1.0); Lr=@(0.001,0.01,0.1) } [double[][]] $X, [double[]] $y, [int] $NTrials = 20, [int] $Folds = 5, [string] $Metric = "R2", [int] $Seed = 42 ) $rng = [System.Random]::new($Seed) $keys = @($ParamSpace.Keys) Write-Host "" Write-Host ("🎲 Random Search: {0} trials x {1} folds = {2} fits" -f $NTrials, $Folds, ($NTrials * $Folds)) -ForegroundColor Green $results = @() $best = $null $bestScore = [double]::MinValue for ($trial = 1; $trial -le $NTrials; $trial++) { # Sample random config $combo = @{} foreach ($key in $keys) { $vals = $ParamSpace[$key] $combo[$key]= $vals[$rng.Next(0, $vals.Count)] } try { $model = & $ModelFactory $combo $score = Invoke-AutoMLCrossVal -Model $model -X $X -y $y -Folds $Folds -Metric $Metric } catch { $score = [double]::MinValue } $paramStr = ($combo.Keys | ForEach-Object { "{0}={1}" -f $_, $combo[$_] }) -join " " $isBest = $score -gt $bestScore if ($isBest) { $bestScore = $score; $best = $combo } $marker = if ($isBest) { " ★" } else { "" } $color = if ($isBest) { "Green" } else { "DarkGray" } Write-Host (" [{0,3}/{1}] {2,-35} {3}={4:F4}{5}" -f $trial, $NTrials, $paramStr, $Metric, $score, $marker) -ForegroundColor $color $results += @{ Params=$combo; Score=$score } } Write-Host "" Write-Host ("✅ Best: {0}={1:F4}" -f $Metric, $bestScore) -ForegroundColor Green $paramStr = ($best.Keys | ForEach-Object { "{0}={1}" -f $_, $best[$_] }) -join " " Write-Host (" Params: {0}" -f $paramStr) -ForegroundColor White Write-Host "" return @{ BestParams=$best; BestScore=$bestScore; AllResults=$results } } # ============================================================ # BAYESIAN OPTIMIZATION # ============================================================ # TEACHING NOTE: Bayesian optimization is much smarter! # It maintains a SURROGATE MODEL of the objective function # (approximating "what score will I get for this config?") # and uses an ACQUISITION FUNCTION to pick the next config. # # Our simplified version: # 1. Run a few random trials to warm up # 2. Fit a simple surrogate: weighted average of past results # 3. Use Upper Confidence Bound (UCB) acquisition: # score_estimate + exploration_bonus # 4. Pick config with highest UCB, evaluate it # 5. Update surrogate and repeat # # This balances EXPLOITATION (try configs near good ones) # and EXPLORATION (try configs we haven't seen yet)! # ============================================================ function Invoke-VBAFBayesianSearch { param( [scriptblock] $ModelFactory, [hashtable] $ParamSpace, [double[][]] $X, [double[]] $y, [int] $NTrials = 20, [int] $WarmupTrials= 5, # random trials before Bayesian kicks in [int] $Folds = 5, [string] $Metric = "R2", [double] $Kappa = 2.0, # exploration weight in UCB [int] $Seed = 42 ) $rng = [System.Random]::new($Seed) $keys = @($ParamSpace.Keys) $history = @() # @{ Params=@{}; Score=double; Vector=double[] } $best = $null $bestScore= [double]::MinValue Write-Host "" Write-Host ("🧠 Bayesian Search: {0} trials ({1} warmup) x {2} folds" -f $NTrials, $WarmupTrials, $Folds) -ForegroundColor Green Write-Host (" Kappa (exploration)={0}" -f $Kappa) -ForegroundColor DarkGray # Build candidate pool (all combinations, or large random sample) $allCombos = @(@{}) foreach ($key in $keys) { $newCombos = @() foreach ($existing in $allCombos) { foreach ($val in $ParamSpace[$key]) { $newCombo = @{} foreach ($k in $existing.Keys) { $newCombo[$k] = $existing[$k] } $newCombo[$key] = $val $newCombos += $newCombo } } $allCombos = $newCombos } # Encode combos as numeric vectors for surrogate function Get-ComboVector { param([hashtable]$combo) $vec = @() foreach ($key in $keys) { $vals = $ParamSpace[$key] $idx = [array]::IndexOf($vals, $combo[$key]) $vec += [double]$idx / [Math]::Max(1, $vals.Count - 1) } return $vec } for ($trial = 1; $trial -le $NTrials; $trial++) { $isWarmup = $trial -le $WarmupTrials $combo = $null if ($isWarmup -or $history.Count -lt 2) { # Random warmup $combo = @{} foreach ($key in $keys) { $vals = $ParamSpace[$key] $combo[$key]= $vals[$rng.Next(0, $vals.Count)] } } else { # Bayesian: use UCB acquisition over all candidates $bestUCB = [double]::MinValue $bestCombo = $allCombos[0] foreach ($cand in $allCombos) { $candVec = Get-ComboVector $cand # Surrogate: weighted mean of past scores (closer = higher weight) $weightedSum = 0.0; $totalWeight = 0.0 foreach ($h in $history) { $dist = 0.0 for ($d = 0; $d -lt $candVec.Length; $d++) { $diff = $candVec[$d] - $h.Vector[$d] $dist += $diff * $diff } $w = [Math]::Exp(-5.0 * $dist) $weightedSum += $w * $h.Score $totalWeight += $w } $mu = if ($totalWeight -gt 0) { $weightedSum / $totalWeight } else { 0.0 } # Uncertainty: distance from nearest observed point $minDist = [double]::MaxValue foreach ($h in $history) { $dist = 0.0 for ($d = 0; $d -lt $candVec.Length; $d++) { $diff = $candVec[$d] - $h.Vector[$d] $dist += $diff * $diff } if ($dist -lt $minDist) { $minDist = $dist } } $sigma = [Math]::Sqrt($minDist) $ucb = $mu + $Kappa * $sigma if ($ucb -gt $bestUCB) { $bestUCB = $ucb; $bestCombo = $cand } } $combo = $bestCombo } try { $model = & $ModelFactory $combo $score = Invoke-AutoMLCrossVal -Model $model -X $X -y $y -Folds $Folds -Metric $Metric } catch { $score = [double]::MinValue } $vec = Get-ComboVector $combo $history += @{ Params=$combo; Score=$score; Vector=$vec } $isBest = $score -gt $bestScore if ($isBest) { $bestScore = $score; $best = $combo } $marker = if ($isBest) { " ★" } else { "" } $mode = if ($isWarmup) { "warmup" } else { "bayes " } $color = if ($isBest) { "Green" } elseif ($isWarmup) { "DarkGray" } else { "White" } $paramStr= ($combo.Keys | ForEach-Object { "{0}={1}" -f $_, $combo[$_] }) -join " " Write-Host (" [{0,3}/{1}] [{2}] {3,-30} {4}={5:F4}{6}" -f $trial, $NTrials, $mode, $paramStr, $Metric, $score, $marker) -ForegroundColor $color } Write-Host "" Write-Host ("✅ Best: {0}={1:F4}" -f $Metric, $bestScore) -ForegroundColor Green $paramStr = ($best.Keys | ForEach-Object { "{0}={1}" -f $_, $best[$_] }) -join " " Write-Host (" Params: {0}" -f $paramStr) -ForegroundColor White Write-Host "" return @{ BestParams=$best; BestScore=$bestScore; History=$history } } # ============================================================ # ALGORITHM SELECTION # ============================================================ # TEACHING NOTE: Why try multiple algorithms? # No Free Lunch theorem: no single algorithm is best for all problems! # AutoML tests multiple candidates and picks the winner. # # For regression: Linear, Ridge, Lasso # For classification: Logistic, GaussianNB, DecisionTree # ============================================================ function Invoke-VBAFAlgorithmSelection { param( [double[][]] $X, [double[]] $y, [string] $Task = "regression", # regression, classification [int] $Folds = 5, [string] $Metric = "R2" ) $candidates = if ($Task -eq "regression") { @( @{ Name="LinearRegression"; Factory={ [LinearRegression]::new() } }, @{ Name="RidgeRegression_01"; Factory={ [RidgeRegression]::new(0.1) } }, @{ Name="RidgeRegression_1"; Factory={ [RidgeRegression]::new(1.0) } }, @{ Name="LassoRegression_01"; Factory={ [LassoRegression]::new(0.1) } }, @{ Name="DecisionTree_d3"; Factory={ [DecisionTree]::new("regression", 3, 2) } }, @{ Name="DecisionTree_d5"; Factory={ [DecisionTree]::new("regression", 5, 2) } } ) } else { @( @{ Name="LogisticRegression"; Factory={ [LogisticRegression]::new() } }, @{ Name="GaussianNaiveBayes"; Factory={ [GaussianNaiveBayes]::new() } }, @{ Name="DecisionTree_d3"; Factory={ [DecisionTree]::new("classification", 3, 2) } }, @{ Name="DecisionTree_d5"; Factory={ [DecisionTree]::new("classification", 5, 2) } } ) } Write-Host "" Write-Host ("🤖 Algorithm Selection: {0} task, {1} candidates" -f $Task, $candidates.Count) -ForegroundColor Green Write-Host (" {0,-25} {1,10} {2}" -f "Algorithm", $Metric, "Bar") -ForegroundColor Yellow Write-Host (" {0}" -f ("-" * 55)) -ForegroundColor DarkGray $results = @() $best = $null $bestScore = [double]::MinValue foreach ($cand in $candidates) { try { $model = & $cand.Factory $score = Invoke-AutoMLCrossVal -Model $model -X $X -y $y -Folds $Folds -Metric $Metric } catch { $score = [double]::MinValue } $isBest = $score -gt $bestScore if ($isBest) { $bestScore = $score; $best = $cand.Name } $bar = "█" * [int]([Math]::Max(0, $score * 20)) $marker = if ($isBest) { " ★" } else { "" } $color = if ($isBest) { "Green" } else { "White" } Write-Host (" {0,-25} {1,10:F4} {2}{3}" -f $cand.Name, $score, $bar, $marker) -ForegroundColor $color $results += @{ Name=$cand.Name; Score=$score } } Write-Host "" Write-Host ("✅ Best algorithm: {0} ({1}={2:F4})" -f $best, $Metric, $bestScore) -ForegroundColor Green Write-Host "" return @{ BestAlgorithm=$best; BestScore=$bestScore; AllResults=$results } } # ============================================================ # FEATURE SELECTION # ============================================================ # TEACHING NOTE: Feature selection = automatically remove # irrelevant or redundant features. # # Methods: # Filter : rank features by correlation with target (fast, model-agnostic) # Wrapper : try subsets, pick best (slow, model-aware) # RFE : Recursive Feature Elimination - remove weakest one by one # # We implement Filter and a simple greedy forward selection. # ============================================================ function Invoke-VBAFFeatureSelection { param( [double[][]] $X, [double[]] $y, [string[]] $FeatureNames = @(), [int] $MaxFeatures = -1, # -1 = auto [string] $Method = "filter", # filter, forward [int] $Folds = 5 ) $nFeatures = $X[0].Length if ($FeatureNames.Length -eq 0) { $FeatureNames = 0..($nFeatures-1) | ForEach-Object { "f$_" } } if ($MaxFeatures -le 0) { $MaxFeatures = [Math]::Max(1, [int]($nFeatures * 0.7)) } Write-Host "" Write-Host ("🔍 Feature Selection: {0} method, {1} -> max {2} features" -f $Method, $nFeatures, $MaxFeatures) -ForegroundColor Green if ($Method -eq "filter") { # Correlation-based filter $scores = @() for ($f = 0; $f -lt $nFeatures; $f++) { $xVals = $X | ForEach-Object { [double]$_[$f] } $xMean = ($xVals | Measure-Object -Average).Average $yMean = ($y | Measure-Object -Average).Average $num = 0.0; $dx2 = 0.0; $dy2 = 0.0 for ($i = 0; $i -lt $y.Length; $i++) { $num += ($xVals[$i] - $xMean) * ($y[$i] - $yMean) $dx2 += ($xVals[$i] - $xMean) * ($xVals[$i] - $xMean) $dy2 += ($y[$i] - $yMean) * ($y[$i] - $yMean) } $corr = if ($dx2 -gt 0 -and $dy2 -gt 0) { [Math]::Abs($num / [Math]::Sqrt($dx2 * $dy2)) } else { 0.0 } $scores += @{ Index=$f; Name=$FeatureNames[$f]; Correlation=$corr } } $ranked = $scores | Sort-Object { $_.Correlation } -Descending $selected = @($ranked | Select-Object -First $MaxFeatures) Write-Host (" {0,-15} {1,10} {2}" -f "Feature", "|Corr|", "Bar") -ForegroundColor Yellow Write-Host (" {0}" -f ("-" * 40)) -ForegroundColor DarkGray foreach ($s in $ranked) { $bar = "█" * [int]($s.Correlation * 20) $kept = $selected | Where-Object { $_.Index -eq $s.Index } $marker = if ($null -ne $kept) { " ✅" } else { " ❌" } $color = if ($null -ne $kept) { "Green" } else { "DarkGray" } Write-Host (" {0,-15} {1,10:F4} {2}{3}" -f $s.Name, $s.Correlation, $bar, $marker) -ForegroundColor $color } $selectedIdx = @($selected | ForEach-Object { $_.Index } | Sort-Object) } elseif ($Method -eq "forward") { # Greedy forward selection $remaining = @(0..($nFeatures-1)) $selectedIdx = @() $bestOverall = [double]::MinValue Write-Host " Greedy forward selection:" -ForegroundColor DarkGray while ($selectedIdx.Length -lt $MaxFeatures -and $remaining.Length -gt 0) { $bestScore = [double]::MinValue $bestFeature= -1 foreach ($f in $remaining) { $trySet = @($selectedIdx) + @($f) $Xsub = @($X | ForEach-Object { $row = $_; ,([double[]]($trySet | ForEach-Object { $row[$_] })) }) $yArr = [double[]]$y try { $model = [LinearRegression]::new() $score = Invoke-AutoMLCrossVal -Model $model -X $Xsub -y $yArr -Folds $Folds -Metric "R2" } catch { $score = [double]::MinValue } if ($score -gt $bestScore) { $bestScore = $score; $bestFeature = $f } } if ($bestFeature -ge 0 -and $bestScore -gt $bestOverall - 0.001) { $selectedIdx += $bestFeature $remaining = @($remaining | Where-Object { $_ -ne $bestFeature }) $bestOverall = $bestScore Write-Host (" + {0,-15} R2={1:F4} (set: {2})" -f $FeatureNames[$bestFeature], $bestScore, ($selectedIdx | ForEach-Object { $FeatureNames[$_] }) -join ",") -ForegroundColor Green } else { break # adding more features doesn't help } } } Write-Host "" Write-Host ("✅ Selected {0}/{1} features: {2}" -f $selectedIdx.Length, $nFeatures, ($selectedIdx | ForEach-Object { $FeatureNames[$_] }) -join ", ") -ForegroundColor Green # Return reduced dataset $Xreduced = @($X | ForEach-Object { $row = $_; ,([double[]]($selectedIdx | ForEach-Object { $row[$_] })) }) Write-Host "" return @{ SelectedIndices=$selectedIdx; SelectedNames=($selectedIdx | ForEach-Object { $FeatureNames[$_] }); X=$Xreduced } } # ============================================================ # PIPELINE AUTOMATION # ============================================================ # TEACHING NOTE: An AutoML pipeline chains: # 1. Data preprocessing (imputation, scaling) # 2. Feature selection # 3. Algorithm selection # 4. Hyperparameter optimization # 5. Final model training + evaluation # # This is what tools like Auto-sklearn and H2O AutoML do! # ============================================================ function Invoke-VBAFAutoML { param( [double[][]] $X, [double[]] $y, [string[]] $FeatureNames = @(), [string] $Task = "regression", [string] $OptMethod = "random", # grid, random, bayesian [int] $HPOTrials = 15, [int] $Folds = 5, [string] $Metric = "R2" ) Write-Host "" Write-Host "╔══════════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ VBAF AutoML Pipeline ║" -ForegroundColor Cyan Write-Host ("║ Task: {0,-35}║" -f $Task) -ForegroundColor White Write-Host ("║ HPO: {0,-35}║" -f $OptMethod) -ForegroundColor White Write-Host ("║ Metric: {0,-33}║" -f $Metric) -ForegroundColor White Write-Host "╚══════════════════════════════════════════╝" -ForegroundColor Cyan $sw = [System.Diagnostics.Stopwatch]::StartNew() # Step 1: Feature selection Write-Host "`n[Step 1/3] Feature Selection" -ForegroundColor Yellow $fsResult = Invoke-VBAFFeatureSelection -X $X -y $y -FeatureNames $FeatureNames -Method "filter" -Folds $Folds $Xselected = $fsResult.X $selectedIdx = $fsResult.SelectedIndices # Step 2: Algorithm selection Write-Host "[Step 2/3] Algorithm Selection" -ForegroundColor Yellow $algoResult = Invoke-VBAFAlgorithmSelection -X $Xselected -y $y -Task $Task -Folds $Folds -Metric $Metric $bestAlgo = $algoResult.BestAlgorithm # Step 3: Hyperparameter optimization for best algorithm Write-Host "[Step 3/3] Hyperparameter Optimization ($bestAlgo)" -ForegroundColor Yellow $paramSpace = switch -Wildcard ($bestAlgo) { "Ridge*" { @{ Lambda=@(0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0) } } "Lasso*" { @{ Lambda=@(0.001, 0.01, 0.1, 0.5, 1.0) } } "Decision*" { @{ MaxDepth=@(2,3,4,5,6); MinSamples=@(1,2,3) } } default { @{ Dummy=@(1) } } # linear has no hyperparams } $modelFactory = switch -Wildcard ($bestAlgo) { "LinearRegression" { { param($p) [LinearRegression]::new() } } "Ridge*" { { param($p) [RidgeRegression]::new($p.Lambda) } } "Lasso*" { { param($p) [LassoRegression]::new($p.Lambda) } } "DecisionTree*" { { param($p) [DecisionTree]::new($Task, [int]$p.MaxDepth, [int]$p.MinSamples) } } "GaussianNaiveBayes"{ { param($p) [GaussianNaiveBayes]::new() } } "LogisticRegression"{ { param($p) [LogisticRegression]::new() } } default { { param($p) [LinearRegression]::new() } } } $hpoResult = switch ($OptMethod) { "grid" { Invoke-VBAFGridSearch -ModelFactory $modelFactory -ParamGrid $paramSpace -X $Xselected -y $y -Folds $Folds -Metric $Metric } "bayesian" { Invoke-VBAFBayesianSearch -ModelFactory $modelFactory -ParamSpace $paramSpace -X $Xselected -y $y -NTrials $HPOTrials -Folds $Folds -Metric $Metric } default { Invoke-VBAFRandomSearch -ModelFactory $modelFactory -ParamSpace $paramSpace -X $Xselected -y $y -NTrials $HPOTrials -Folds $Folds -Metric $Metric } } $sw.Stop() # Train final model on all data with best params $finalModel = & $modelFactory $hpoResult.BestParams $finalModel.Fit($Xselected, $y) Write-Host "" Write-Host "╔══════════════════════════════════════════╗" -ForegroundColor Green Write-Host "║ AutoML Results ║" -ForegroundColor Green Write-Host ("║ Best algorithm : {0,-23}║" -f $bestAlgo) -ForegroundColor White Write-Host ("║ Features used : {0,-23}║" -f $selectedIdx.Length) -ForegroundColor White Write-Host ("║ Best {0,-7} : {1,-23:F4}║" -f $Metric, $hpoResult.BestScore) -ForegroundColor White Write-Host ("║ Total time : {0,-23}║" -f ("{0:F1}s" -f $sw.Elapsed.TotalSeconds)) -ForegroundColor White Write-Host "╚══════════════════════════════════════════╝" -ForegroundColor Green Write-Host "" return @{ Model = $finalModel Algorithm = $bestAlgo BestParams = $hpoResult.BestParams BestScore = $hpoResult.BestScore SelectedFeatures = $fsResult.SelectedNames SelectedIndices = $selectedIdx TotalSeconds = $sw.Elapsed.TotalSeconds } } # ============================================================ # TEST # 1. Run VBAF.LoadAll.ps1 # # --- Grid Search --- # 2. $data = Get-VBAFDataset -Name "HousePrice" # $scaler = [StandardScaler]::new() # $Xs = $scaler.FitTransform($data.X) # $result = Invoke-VBAFGridSearch ` # -ModelFactory { param($p) [RidgeRegression]::new($p.Lambda) } ` # -ParamGrid @{ Lambda=@(0.001, 0.01, 0.1, 1.0, 10.0) } ` # -X $Xs -y $data.y -Metric "R2" # Write-Host "Best Lambda: $($result.BestParams.Lambda)" # # --- Random Search --- # 3. $result2 = Invoke-VBAFRandomSearch ` # -ModelFactory { param($p) [RidgeRegression]::new($p.Lambda) } ` # -ParamSpace @{ Lambda=@(0.001,0.01,0.1,0.5,1.0,5.0,10.0) } ` # -X $Xs -y $data.y -NTrials 10 -Metric "R2" # # --- Bayesian Search --- # 4. $result3 = Invoke-VBAFBayesianSearch ` # -ModelFactory { param($p) [RidgeRegression]::new($p.Lambda) } ` # -ParamSpace @{ Lambda=@(0.001,0.01,0.1,0.5,1.0,5.0,10.0) } ` # -X $Xs -y $data.y -NTrials 15 -WarmupTrials 5 -Metric "R2" # # --- Algorithm Selection --- # 5. Invoke-VBAFAlgorithmSelection -X $Xs -y $data.y -Task "regression" # # --- Feature Selection --- # 6. Invoke-VBAFFeatureSelection -X $Xs -y $data.y ` # -FeatureNames @("size_sqm","bedrooms","age_years") -Method "filter" # # --- Full AutoML Pipeline --- # 7. $auto = Invoke-VBAFAutoML -X $Xs -y $data.y ` # -FeatureNames @("size_sqm","bedrooms","age_years") ` # -Task "regression" -OptMethod "bayesian" -HPOTrials 15 # Write-Host "Best model: $($auto.Algorithm) R2=$($auto.BestScore)" # ============================================================ Write-Host "📦 VBAF.ML.AutoML.ps1 loaded [v2.1.0 🤖]" -ForegroundColor Green Write-Host " HPO : Invoke-VBAFGridSearch" -ForegroundColor Cyan Write-Host " Invoke-VBAFRandomSearch" -ForegroundColor Cyan Write-Host " Invoke-VBAFBayesianSearch" -ForegroundColor Cyan Write-Host " Selection : Invoke-VBAFAlgorithmSelection" -ForegroundColor Cyan Write-Host " Invoke-VBAFFeatureSelection" -ForegroundColor Cyan Write-Host " Pipeline : Invoke-VBAFAutoML" -ForegroundColor Cyan Write-Host " Note: NAS skipped - neural net backprop not reliable in PS 5.1" -ForegroundColor DarkGray Write-Host "" Write-Host " Quick start:" -ForegroundColor Yellow Write-Host ' $data = Get-VBAFDataset -Name "HousePrice"' -ForegroundColor White Write-Host ' $scaler = [StandardScaler]::new()' -ForegroundColor White Write-Host ' $Xs = $scaler.FitTransform($data.X)' -ForegroundColor White Write-Host ' $auto = Invoke-VBAFAutoML -X $Xs -y $data.y -FeatureNames @("size_sqm","bedrooms","age_years") -OptMethod "bayesian"' -ForegroundColor White Write-Host "" |