VBAF

5.0.0

VBAF.Benchmark.ps1

                                #Requires -Version 5.1

<#

.SYNOPSIS

    VBAF Benchmark Module -- Invoke-VBAFAgentBenchmark

.DESCRIPTION

    Compare multiple RL agents side by side on the same environment.

    Outputs a formatted comparison table and optional CSV export.

.NOTES

    Part of VBAF (Visual AI & Reinforcement Learning Framework)

    Phase 6 -- benchmark module.

    ASCII only -- no Unicode, no emoji, no box-drawing characters.

    Requires: VBAF.LoadAll.ps1

#>

function Invoke-VBAFSingleAgentBenchmark {

    param(

        [string]$AgentName,

        [object]$Environment,

        [int]$Episodes,

        [switch]$Silent

    )

    $rewards   = [System.Collections.Generic.List[double]]::new()

    $startTime = Get-Date

    if (-not $Silent) {

        Write-Host "    Running $AgentName for $Episodes episodes..." -ForegroundColor DarkGray

    }

    switch ($AgentName) {

        "DQN" {

            $config              = [DQNConfig]::new()

            $config.StateSize    = $Environment.ObservationSpace.Size

            $config.ActionSize   = $Environment.ActionSpace.Size

            $config.EpsilonDecay = 0.9995

            $config.EpsilonMin   = 0.05

            [int[]] $arch = @($Environment.ObservationSpace.Size, 24, 24, $Environment.ActionSpace.Size)

            $main   = [NeuralNetwork]::new($arch, $config.LearningRate)

            $target = [NeuralNetwork]::new($arch, $config.LearningRate)

            $memory = [ExperienceReplay]::new($config.MemorySize)

            $agent  = [DQNAgent]::new($config, $main, $target, $memory)

            for ($ep = 1; $ep -le $Episodes; $ep++) {

                [double[]] $state = $Environment.Reset()

                $epReward = 0.0

                $step     = 0

                $stepDone = $false

                while (-not $stepDone -and $step -lt 500) {

                    $action          = $agent.Act($state)

                    $sr              = $Environment.Step($action)

                    [double[]] $next = $sr.NextState

                    $agent.Remember($state, $action, $sr.Reward, $next, $sr.Done)

                    if ($step % 4 -eq 0) { $agent.Replay() | Out-Null }

                    $stepDone  = $sr.Done

                    $state     = $next

                    $epReward += $sr.Reward

                    $step++

                }

                $agent.EndEpisode($epReward) | Out-Null

                $rewards.Add($epReward)

            }

        }

        "PPO" {

            $results = Invoke-PPOTraining -Episodes $Episodes -PrintEvery ($Episodes + 1) -FastMode

            if ($results -and $results[-1].PSObject.Properties['EpisodeRewards']) {

                foreach ($r in $results[-1].EpisodeRewards) { $rewards.Add([double]$r) }

            }

        }

        "A3C" {

            $results = Invoke-A3CTraining -Episodes $Episodes -PrintEvery ($Episodes + 1) -FastMode

            if ($results -and $results[-1].PSObject.Properties['EpisodeRewards']) {

                foreach ($r in $results[-1].EpisodeRewards) { $rewards.Add([double]$r) }

            }

        }

        "QLearning" {

            $actionNames = @(0..($Environment.ActionSpace.Size - 1) | ForEach-Object { "$_" })

            $agent       = [QLearningAgent]::new($actionNames)

            for ($ep = 1; $ep -le $Episodes; $ep++) {

                [double[]] $stateArr = $Environment.Reset()

                $stateStr = ($stateArr | ForEach-Object { [Math]::Round($_, 1) }) -join "|"

                $epReward = 0.0

                $step     = 0

                $stepDone = $false

                while (-not $stepDone -and $step -lt 200) {

                    $action          = [int]$agent.ChooseAction($stateStr)

                    $sr              = $Environment.Step($action)

                    [double[]] $nextArr = $sr.NextState

                    $nextStr         = ($nextArr | ForEach-Object { [Math]::Round($_, 1) }) -join "|"

                    $agent.Learn($stateStr, "$action", $sr.Reward, $nextStr)

                    $stateStr  = $nextStr

                    $epReward += $sr.Reward

                    $stepDone  = $sr.Done

                    $step++

                }

                $agent.EndEpisode($epReward)

                $rewards.Add($epReward)

            }

        }

    }

    $elapsed = (Get-Date) - $startTime

    if ($rewards.Count -eq 0) {

        return @{

            Agent = $AgentName; Episodes = $Episodes; Rewards = @()

            Mean = 0.0; Best = 0.0; Worst = 0.0

            First10Avg = 0.0; Last10Avg = 0.0; Improvement = 0.0

            TimeSeconds = $elapsed.TotalSeconds

        }

    }

    $rewardArr   = $rewards.ToArray()

    $mean        = ($rewardArr | Measure-Object -Average).Average

    $best        = ($rewardArr | Measure-Object -Maximum).Maximum

    $worst       = ($rewardArr | Measure-Object -Minimum).Minimum

    $first10     = $rewardArr[0..([Math]::Min(9, $rewardArr.Count - 1))]

    $last10start = [Math]::Max(0, $rewardArr.Count - 10)

    $last10      = $rewardArr[$last10start..($rewardArr.Count - 1)]

    $first10Avg  = ($first10 | Measure-Object -Average).Average

    $last10Avg   = ($last10  | Measure-Object -Average).Average

    $improvement = if ($first10Avg -ne 0) { ($last10Avg - $first10Avg) / [Math]::Abs($first10Avg) * 100 } else { 0.0 }

    return @{

        Agent       = $AgentName

        Episodes    = $Episodes

        Rewards     = $rewardArr

        Mean        = $mean

        Best        = $best

        Worst       = $worst

        First10Avg  = $first10Avg

        Last10Avg   = $last10Avg

        Improvement = $improvement

        TimeSeconds = $elapsed.TotalSeconds

    }

}

function Invoke-VBAFAgentBenchmark {

    param(

        [string]$Environment       = "CartPole",

        [object]$CustomEnvironment = $null,

        [int]$Episodes             = 50,

        [int]$Runs                 = 1,

        [string[]]$Agents          = @("DQN", "PPO", "A3C"),

        [string]$ExportCsv         = "",

        [int]$PrintEvery           = 10

    )

    Write-Host ""

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host "  VBAF AGENT BENCHMARK" -ForegroundColor Cyan

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host ""

    Write-Host "  Environment : $Environment" -ForegroundColor White

    Write-Host "  Agents      : $($Agents -join ', ')" -ForegroundColor White

    Write-Host "  Episodes    : $Episodes per agent" -ForegroundColor White

    Write-Host "  Runs        : $Runs per agent" -ForegroundColor White

    Write-Host ""

    $env = if ($CustomEnvironment) { $CustomEnvironment } else { New-VBAFEnvironment -Name $Environment -MaxSteps 200 }

    Write-Host "  State size  : $($env.ObservationSpace.Size)" -ForegroundColor DarkGray

    Write-Host "  Action size : $($env.ActionSpace.Size)" -ForegroundColor DarkGray

    Write-Host ""

    $allResults = [System.Collections.Generic.List[hashtable]]::new()

    $csvRows    = [System.Collections.Generic.List[hashtable]]::new()

    $agentNum   = 0

    foreach ($agentName in $Agents) {

        $agentNum++

        Write-Host ("-" * 65) -ForegroundColor Yellow

        Write-Host "  Agent $agentNum/$($Agents.Count) : $agentName" -ForegroundColor Yellow

        Write-Host ("-" * 65) -ForegroundColor Yellow

        $runResults = [System.Collections.Generic.List[hashtable]]::new()

        for ($run = 1; $run -le $Runs; $run++) {

            if ($Runs -gt 1) { Write-Host "    Run $run of $Runs..." -ForegroundColor DarkGray }

            $result = Invoke-VBAFSingleAgentBenchmark -AgentName $agentName -Environment $env -Episodes $Episodes

            $runResults.Add($result)

            Write-Host ("    Mean reward : {0,8:F2}" -f $result.Mean)        -ForegroundColor White

            Write-Host ("    Best reward : {0,8:F2}" -f $result.Best)        -ForegroundColor Green

            Write-Host ("    Worst reward: {0,8:F2}" -f $result.Worst)       -ForegroundColor Red

            Write-Host ("    First 10 avg: {0,8:F2}" -f $result.First10Avg)  -ForegroundColor DarkGray

            Write-Host ("    Last  10 avg: {0,8:F2}" -f $result.Last10Avg)   -ForegroundColor DarkGray

            $impColor = if ($result.Improvement -gt 0) { "Green" } else { "Red" }

            Write-Host ("    Improvement : {0,7:F1}%" -f $result.Improvement) -ForegroundColor $impColor

            Write-Host ("    Time        : {0,7:F1}s" -f $result.TimeSeconds) -ForegroundColor DarkGray

            Write-Host ""

            $csvRows.Add(@{

                Agent = $agentName; Environment = $Environment; Run = $run

                Episodes = $Episodes

                Mean = [Math]::Round($result.Mean, 4)

                Best = [Math]::Round($result.Best, 4)

                Worst = [Math]::Round($result.Worst, 4)

                First10Avg = [Math]::Round($result.First10Avg, 4)

                Last10Avg = [Math]::Round($result.Last10Avg, 4)

                Improvement = [Math]::Round($result.Improvement, 2)

                TimeSeconds = [Math]::Round($result.TimeSeconds, 1)

            })

        }

        $r = $runResults[0]

        $allResults.Add(@{

            Agent    = $agentName

            AvgMean  = ($runResults | ForEach-Object { $_.Mean } | Measure-Object -Average).Average

            AvgImp   = ($runResults | ForEach-Object { $_.Improvement } | Measure-Object -Average).Average

            BestMean = $r.Best

            Runs     = $Runs

        })

    }

    Write-Host ""

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host "  BENCHMARK RESULTS -- $Environment -- $Episodes episodes" -ForegroundColor Cyan

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host ""

    Write-Host ("  {0,-12} {1,12} {2,12} {3,12}" -f "Agent", "Avg Reward", "Improvement", "Best Reward") -ForegroundColor Gray

    Write-Host ("  {0,-12} {1,12} {2,12} {3,12}" -f "-----", "----------", "-----------", "-----------") -ForegroundColor DarkGray

    $ranked = $allResults | Sort-Object { $_.AvgMean } -Descending

    $rank   = 1

    foreach ($r in $ranked) {

        $impColor  = if ($r.AvgImp -gt 0) { "Green" } else { "Red" }

        $rankColor = if ($rank -eq 1) { "Yellow" } else { "White" }

        Write-Host -NoNewline ("  [{0}] {1,-10}" -f $rank, $r.Agent) -ForegroundColor $rankColor

        Write-Host -NoNewline ("{0,10:F2}  " -f $r.AvgMean) -ForegroundColor White

        Write-Host -NoNewline ("{0,10:F1}%  " -f $r.AvgImp) -ForegroundColor $impColor

        Write-Host ("{0,10:F2}" -f $r.BestMean) -ForegroundColor Green

        $rank++

    }

    Write-Host ""

    $winner = $ranked[0]

    Write-Host "  Best agent: $($winner.Agent)  Avg: $($winner.AvgMean.ToString('F2'))  Improvement: $($winner.AvgImp.ToString('F1'))%" -ForegroundColor Yellow

    Write-Host ""

    if ($ExportCsv) {

        try {

            $csvRows | ForEach-Object { [PSCustomObject]$_ } | Export-Csv -Path $ExportCsv -NoTypeInformation -Encoding UTF8

            Write-Host "  Results exported to: $ExportCsv" -ForegroundColor Green

        } catch {

            Write-Host "  CSV export failed: $_" -ForegroundColor Red

        }

    }

    return @{ Environment = $Environment; Episodes = $Episodes; Results = $allResults; Winner = $winner.Agent }

}

function Invoke-VBAFQuickBenchmark {

    param(

        [string]$AgentName   = "DQN",

        [string]$Environment = "CartPole",

        [int]$Episodes       = 50

    )

    Write-Host ""

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host "  VBAF QUICK BENCHMARK: $AgentName vs Random on $Environment" -ForegroundColor Cyan

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host ""

    $env = New-VBAFEnvironment -Name $Environment -MaxSteps 200

    Write-Host "  Phase 1: Random agent baseline..." -ForegroundColor Gray

    $baseRewards = @()

    for ($ep = 1; $ep -le 10; $ep++) {

        [double[]] $state = $env.Reset()

        $epReward = 0.0

        $stepDone = $false

        $step     = 0

        while (-not $stepDone -and $step -lt 200) {

            $sr        = $env.Step((Get-Random -Minimum 0 -Maximum $env.ActionSpace.Size))

            $epReward += $sr.Reward

            $stepDone  = $sr.Done

            $step++

        }

        $baseRewards += $epReward

    }

    $baseAvg = ($baseRewards | Measure-Object -Average).Average

    Write-Host ("  Baseline avg reward (10 episodes): {0:F2}" -f $baseAvg) -ForegroundColor Gray

    Write-Host ""

    Write-Host "  Phase 2: Training $AgentName for $Episodes episodes..." -ForegroundColor Yellow

    $result = Invoke-VBAFSingleAgentBenchmark -AgentName $AgentName -Environment $env -Episodes $Episodes -Silent

    Write-Host ""

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host "  QUICK BENCHMARK RESULTS" -ForegroundColor Cyan

    Write-Host ("=" * 65) -ForegroundColor Cyan

    Write-Host ""

    Write-Host ("  Random baseline avg  : {0,8:F2}" -f $baseAvg)       -ForegroundColor Gray

    Write-Host ("  $AgentName trained avg    : {0,8:F2}" -f $result.Mean) -ForegroundColor White

    $totalImp = if ($baseAvg -ne 0) { ($result.Mean - $baseAvg) / [Math]::Abs($baseAvg) * 100 } else { 0.0 }

    $impColor = if ($totalImp -gt 0) { "Green" } else { "Red" }

    Write-Host ("  Improvement vs random: {0,7:F1}%" -f $totalImp)          -ForegroundColor $impColor

    Write-Host ("  Learning improvement : {0,7:F1}%" -f $result.Improvement) -ForegroundColor $impColor

    Write-Host ""

    if ($totalImp -gt 10) {

        Write-Host "  $AgentName successfully outperformed random baseline." -ForegroundColor Green

    } elseif ($totalImp -gt 0) {

        Write-Host "  $AgentName slightly better than random. Try more episodes." -ForegroundColor Yellow

    } else {

        Write-Host "  $AgentName did not outperform random. Try more episodes." -ForegroundColor Red

    }

    Write-Host ""

    return @{ Agent = $AgentName; Baseline = $baseAvg; Trained = $result.Mean; Improvement = $totalImp; Result = $result }

}