VBAF.Benchmark.ps1

#Requires -Version 5.1

<#
.SYNOPSIS
    VBAF Benchmark Module -- Invoke-VBAFAgentBenchmark
.DESCRIPTION
    Compare multiple RL agents side by side on the same environment.
    Outputs a formatted comparison table and optional CSV export.
.NOTES
    Part of VBAF (Visual AI & Reinforcement Learning Framework)
    Phase 6 -- benchmark module.
    ASCII only -- no Unicode, no emoji, no box-drawing characters.
    Requires: VBAF.LoadAll.ps1
#>



function Invoke-VBAFSingleAgentBenchmark {
    param(
        [string]$AgentName,
        [object]$Environment,
        [int]$Episodes,
        [switch]$Silent
    )

    $rewards   = [System.Collections.Generic.List[double]]::new()
    $startTime = Get-Date

    if (-not $Silent) {
        Write-Host " Running $AgentName for $Episodes episodes..." -ForegroundColor DarkGray
    }

    switch ($AgentName) {
        "DQN" {
            $config              = [DQNConfig]::new()
            $config.StateSize    = $Environment.ObservationSpace.Size
            $config.ActionSize   = $Environment.ActionSpace.Size
            $config.EpsilonDecay = 0.9995
            $config.EpsilonMin   = 0.05
            [int[]] $arch = @($Environment.ObservationSpace.Size, 24, 24, $Environment.ActionSpace.Size)
            $main   = [NeuralNetwork]::new($arch, $config.LearningRate)
            $target = [NeuralNetwork]::new($arch, $config.LearningRate)
            $memory = [ExperienceReplay]::new($config.MemorySize)
            $agent  = [DQNAgent]::new($config, $main, $target, $memory)

            for ($ep = 1; $ep -le $Episodes; $ep++) {
                [double[]] $state = $Environment.Reset()
                $epReward = 0.0
                $step     = 0
                $stepDone = $false

                while (-not $stepDone -and $step -lt 500) {
                    $action          = $agent.Act($state)
                    $sr              = $Environment.Step($action)
                    [double[]] $next = $sr.NextState
                    $agent.Remember($state, $action, $sr.Reward, $next, $sr.Done)
                    if ($step % 4 -eq 0) { $agent.Replay() | Out-Null }
                    $stepDone  = $sr.Done
                    $state     = $next
                    $epReward += $sr.Reward
                    $step++
                }
                $agent.EndEpisode($epReward) | Out-Null
                $rewards.Add($epReward)
            }
        }

        "PPO" {
            $results = Invoke-PPOTraining -Episodes $Episodes -PrintEvery ($Episodes + 1) -FastMode
            if ($results -and $results[-1].PSObject.Properties['EpisodeRewards']) {
                foreach ($r in $results[-1].EpisodeRewards) { $rewards.Add([double]$r) }
            }
        }

        "A3C" {
            $results = Invoke-A3CTraining -Episodes $Episodes -PrintEvery ($Episodes + 1) -FastMode
            if ($results -and $results[-1].PSObject.Properties['EpisodeRewards']) {
                foreach ($r in $results[-1].EpisodeRewards) { $rewards.Add([double]$r) }
            }
        }

        "QLearning" {
            $actionNames = @(0..($Environment.ActionSpace.Size - 1) | ForEach-Object { "$_" })
            $agent       = [QLearningAgent]::new($actionNames)

            for ($ep = 1; $ep -le $Episodes; $ep++) {
                [double[]] $stateArr = $Environment.Reset()
                $stateStr = ($stateArr | ForEach-Object { [Math]::Round($_, 1) }) -join "|"
                $epReward = 0.0
                $step     = 0
                $stepDone = $false

                while (-not $stepDone -and $step -lt 200) {
                    $action          = [int]$agent.ChooseAction($stateStr)
                    $sr              = $Environment.Step($action)
                    [double[]] $nextArr = $sr.NextState
                    $nextStr         = ($nextArr | ForEach-Object { [Math]::Round($_, 1) }) -join "|"
                    $agent.Learn($stateStr, "$action", $sr.Reward, $nextStr)
                    $stateStr  = $nextStr
                    $epReward += $sr.Reward
                    $stepDone  = $sr.Done
                    $step++
                }
                $agent.EndEpisode($epReward)
                $rewards.Add($epReward)
            }
        }
    }

    $elapsed = (Get-Date) - $startTime

    if ($rewards.Count -eq 0) {
        return @{
            Agent = $AgentName; Episodes = $Episodes; Rewards = @()
            Mean = 0.0; Best = 0.0; Worst = 0.0
            First10Avg = 0.0; Last10Avg = 0.0; Improvement = 0.0
            TimeSeconds = $elapsed.TotalSeconds
        }
    }

    $rewardArr   = $rewards.ToArray()
    $mean        = ($rewardArr | Measure-Object -Average).Average
    $best        = ($rewardArr | Measure-Object -Maximum).Maximum
    $worst       = ($rewardArr | Measure-Object -Minimum).Minimum
    $first10     = $rewardArr[0..([Math]::Min(9, $rewardArr.Count - 1))]
    $last10start = [Math]::Max(0, $rewardArr.Count - 10)
    $last10      = $rewardArr[$last10start..($rewardArr.Count - 1)]
    $first10Avg  = ($first10 | Measure-Object -Average).Average
    $last10Avg   = ($last10  | Measure-Object -Average).Average
    $improvement = if ($first10Avg -ne 0) { ($last10Avg - $first10Avg) / [Math]::Abs($first10Avg) * 100 } else { 0.0 }

    return @{
        Agent       = $AgentName
        Episodes    = $Episodes
        Rewards     = $rewardArr
        Mean        = $mean
        Best        = $best
        Worst       = $worst
        First10Avg  = $first10Avg
        Last10Avg   = $last10Avg
        Improvement = $improvement
        TimeSeconds = $elapsed.TotalSeconds
    }
}


function Invoke-VBAFAgentBenchmark {
    param(
        [string]$Environment       = "CartPole",
        [object]$CustomEnvironment = $null,
        [int]$Episodes             = 50,
        [int]$Runs                 = 1,
        [string[]]$Agents          = @("DQN", "PPO", "A3C"),
        [string]$ExportCsv         = "",
        [int]$PrintEvery           = 10
    )

    Write-Host ""
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host " VBAF AGENT BENCHMARK" -ForegroundColor Cyan
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host ""
    Write-Host " Environment : $Environment" -ForegroundColor White
    Write-Host " Agents : $($Agents -join ', ')" -ForegroundColor White
    Write-Host " Episodes : $Episodes per agent" -ForegroundColor White
    Write-Host " Runs : $Runs per agent" -ForegroundColor White
    Write-Host ""

    $env = if ($CustomEnvironment) { $CustomEnvironment } else { New-VBAFEnvironment -Name $Environment -MaxSteps 200 }
    Write-Host " State size : $($env.ObservationSpace.Size)" -ForegroundColor DarkGray
    Write-Host " Action size : $($env.ActionSpace.Size)" -ForegroundColor DarkGray
    Write-Host ""

    $allResults = [System.Collections.Generic.List[hashtable]]::new()
    $csvRows    = [System.Collections.Generic.List[hashtable]]::new()
    $agentNum   = 0

    foreach ($agentName in $Agents) {
        $agentNum++
        Write-Host ("-" * 65) -ForegroundColor Yellow
        Write-Host " Agent $agentNum/$($Agents.Count) : $agentName" -ForegroundColor Yellow
        Write-Host ("-" * 65) -ForegroundColor Yellow

        $runResults = [System.Collections.Generic.List[hashtable]]::new()

        for ($run = 1; $run -le $Runs; $run++) {
            if ($Runs -gt 1) { Write-Host " Run $run of $Runs..." -ForegroundColor DarkGray }

            $result = Invoke-VBAFSingleAgentBenchmark -AgentName $agentName -Environment $env -Episodes $Episodes

            $runResults.Add($result)

            Write-Host (" Mean reward : {0,8:F2}" -f $result.Mean)        -ForegroundColor White
            Write-Host (" Best reward : {0,8:F2}" -f $result.Best)        -ForegroundColor Green
            Write-Host (" Worst reward: {0,8:F2}" -f $result.Worst)       -ForegroundColor Red
            Write-Host (" First 10 avg: {0,8:F2}" -f $result.First10Avg)  -ForegroundColor DarkGray
            Write-Host (" Last 10 avg: {0,8:F2}" -f $result.Last10Avg)   -ForegroundColor DarkGray
            $impColor = if ($result.Improvement -gt 0) { "Green" } else { "Red" }
            Write-Host (" Improvement : {0,7:F1}%" -f $result.Improvement) -ForegroundColor $impColor
            Write-Host (" Time : {0,7:F1}s" -f $result.TimeSeconds) -ForegroundColor DarkGray
            Write-Host ""

            $csvRows.Add(@{
                Agent = $agentName; Environment = $Environment; Run = $run
                Episodes = $Episodes
                Mean = [Math]::Round($result.Mean, 4)
                Best = [Math]::Round($result.Best, 4)
                Worst = [Math]::Round($result.Worst, 4)
                First10Avg = [Math]::Round($result.First10Avg, 4)
                Last10Avg = [Math]::Round($result.Last10Avg, 4)
                Improvement = [Math]::Round($result.Improvement, 2)
                TimeSeconds = [Math]::Round($result.TimeSeconds, 1)
            })
        }

        $r = $runResults[0]
        $allResults.Add(@{
            Agent    = $agentName
            AvgMean  = ($runResults | ForEach-Object { $_.Mean } | Measure-Object -Average).Average
            AvgImp   = ($runResults | ForEach-Object { $_.Improvement } | Measure-Object -Average).Average
            BestMean = $r.Best
            Runs     = $Runs
        })
    }

    Write-Host ""
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host " BENCHMARK RESULTS -- $Environment -- $Episodes episodes" -ForegroundColor Cyan
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host ""
    Write-Host (" {0,-12} {1,12} {2,12} {3,12}" -f "Agent", "Avg Reward", "Improvement", "Best Reward") -ForegroundColor Gray
    Write-Host (" {0,-12} {1,12} {2,12} {3,12}" -f "-----", "----------", "-----------", "-----------") -ForegroundColor DarkGray

    $ranked = $allResults | Sort-Object { $_.AvgMean } -Descending
    $rank   = 1
    foreach ($r in $ranked) {
        $impColor  = if ($r.AvgImp -gt 0) { "Green" } else { "Red" }
        $rankColor = if ($rank -eq 1) { "Yellow" } else { "White" }
        Write-Host -NoNewline (" [{0}] {1,-10}" -f $rank, $r.Agent) -ForegroundColor $rankColor
        Write-Host -NoNewline ("{0,10:F2} " -f $r.AvgMean) -ForegroundColor White
        Write-Host -NoNewline ("{0,10:F1}% " -f $r.AvgImp) -ForegroundColor $impColor
        Write-Host ("{0,10:F2}" -f $r.BestMean) -ForegroundColor Green
        $rank++
    }

    Write-Host ""
    $winner = $ranked[0]
    Write-Host " Best agent: $($winner.Agent) Avg: $($winner.AvgMean.ToString('F2')) Improvement: $($winner.AvgImp.ToString('F1'))%" -ForegroundColor Yellow
    Write-Host ""

    if ($ExportCsv) {
        try {
            $csvRows | ForEach-Object { [PSCustomObject]$_ } | Export-Csv -Path $ExportCsv -NoTypeInformation -Encoding UTF8
            Write-Host " Results exported to: $ExportCsv" -ForegroundColor Green
        } catch {
            Write-Host " CSV export failed: $_" -ForegroundColor Red
        }
    }

    return @{ Environment = $Environment; Episodes = $Episodes; Results = $allResults; Winner = $winner.Agent }
}


function Invoke-VBAFQuickBenchmark {
    param(
        [string]$AgentName   = "DQN",
        [string]$Environment = "CartPole",
        [int]$Episodes       = 50
    )

    Write-Host ""
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host " VBAF QUICK BENCHMARK: $AgentName vs Random on $Environment" -ForegroundColor Cyan
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host ""

    $env = New-VBAFEnvironment -Name $Environment -MaxSteps 200

    Write-Host " Phase 1: Random agent baseline..." -ForegroundColor Gray
    $baseRewards = @()
    for ($ep = 1; $ep -le 10; $ep++) {
        [double[]] $state = $env.Reset()
        $epReward = 0.0
        $stepDone = $false
        $step     = 0
        while (-not $stepDone -and $step -lt 200) {
            $sr        = $env.Step((Get-Random -Minimum 0 -Maximum $env.ActionSpace.Size))
            $epReward += $sr.Reward
            $stepDone  = $sr.Done
            $step++
        }
        $baseRewards += $epReward
    }
    $baseAvg = ($baseRewards | Measure-Object -Average).Average
    Write-Host (" Baseline avg reward (10 episodes): {0:F2}" -f $baseAvg) -ForegroundColor Gray
    Write-Host ""

    Write-Host " Phase 2: Training $AgentName for $Episodes episodes..." -ForegroundColor Yellow
    $result = Invoke-VBAFSingleAgentBenchmark -AgentName $AgentName -Environment $env -Episodes $Episodes -Silent

    Write-Host ""
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host " QUICK BENCHMARK RESULTS" -ForegroundColor Cyan
    Write-Host ("=" * 65) -ForegroundColor Cyan
    Write-Host ""
    Write-Host (" Random baseline avg : {0,8:F2}" -f $baseAvg)       -ForegroundColor Gray
    Write-Host (" $AgentName trained avg : {0,8:F2}" -f $result.Mean) -ForegroundColor White

    $totalImp = if ($baseAvg -ne 0) { ($result.Mean - $baseAvg) / [Math]::Abs($baseAvg) * 100 } else { 0.0 }
    $impColor = if ($totalImp -gt 0) { "Green" } else { "Red" }
    Write-Host (" Improvement vs random: {0,7:F1}%" -f $totalImp)          -ForegroundColor $impColor
    Write-Host (" Learning improvement : {0,7:F1}%" -f $result.Improvement) -ForegroundColor $impColor
    Write-Host ""

    if ($totalImp -gt 10) {
        Write-Host " $AgentName successfully outperformed random baseline." -ForegroundColor Green
    } elseif ($totalImp -gt 0) {
        Write-Host " $AgentName slightly better than random. Try more episodes." -ForegroundColor Yellow
    } else {
        Write-Host " $AgentName did not outperform random. Try more episodes." -ForegroundColor Red
    }

    Write-Host ""
    return @{ Agent = $AgentName; Baseline = $baseAvg; Trained = $result.Mean; Improvement = $totalImp; Result = $result }
}