VBAF

2.1.0

VBAF.RL.DQN.ps1

                                #Requires -Version 5.1

<#

.SYNOPSIS

    Deep Q-Network (DQN) Agent for Reinforcement Learning

.DESCRIPTION

    Implements the DQN algorithm combining:

      - Neural Network for Q-value approximation

      - Experience Replay for stable training

      - Target Network for stable Bellman targets

    Requires VBAF.Core.AllClasses.ps1 and VBAF.RL.ExperienceReplay.ps1

    to be loaded first (via VBAF.LoadAll.ps1).

.NOTES

    Part of VBAF - Phase 3 Reinforcement Learning Module

    PS 5.1 compatible - dependency injection pattern used

    to avoid parse-time type resolution errors in classes.

#>

# Set base path

$basePath = $PSScriptRoot

class DQNConfig {

    [int]    $StateSize        = 4

    [int]    $ActionSize       = 2

    [int[]]  $HiddenLayers     = @(64, 64)

    [double] $LearningRate     = 0.001

    [double] $Gamma            = 0.95

    [double] $Epsilon          = 1.0

    [double] $EpsilonMin       = 0.01

    [double] $EpsilonDecay     = 0.995

    [int]    $BatchSize        = 32

    [int]    $MemorySize       = 10000

    [int]    $TargetUpdateFreq = 10

    [string] $Activation       = "relu"

}

# ============================================================

class DQNAgent {

    # [object] used for all cross-file types - PS 5.1 requirement

    [object] $MainNetwork

    [object] $TargetNetwork

    [object] $Memory

    [object] $Config

    [int]    $ActionSize

    [double] $Epsilon

    [int]    $TotalSteps      = 0

    [int]    $TotalEpisodes   = 0

    [int]    $TrainingSteps   = 0

    [double] $LastLoss        = 0.0

    [System.Collections.Generic.List[double]] $EpisodeRewards

    [System.Collections.Generic.List[double]] $LossHistory

    hidden [System.Random] $Rng

    # -------------------------------------------------------

    # Constructor receives pre-built network objects (injected)

    # so this class never needs to reference external types

    # -------------------------------------------------------

    DQNAgent([object]$config, [object]$mainNetwork, [object]$targetNetwork, [object]$memory) {

        $this.Config        = $config

        $this.MainNetwork   = $mainNetwork

        $this.TargetNetwork = $targetNetwork

        $this.Memory        = $memory

        $this.ActionSize    = $config.ActionSize

        $this.Epsilon       = $config.Epsilon

        $this.Rng           = [System.Random]::new()

        $this.EpisodeRewards = [System.Collections.Generic.List[double]]::new()

        $this.LossHistory    = [System.Collections.Generic.List[double]]::new()

        # Sync target = main weights at start

        $this.SyncTargetNetwork()

        Write-Host "✅ DQNAgent created" -ForegroundColor Green

        Write-Host "   State size  : $($config.StateSize)"           -ForegroundColor Cyan

        Write-Host "   Action size : $($config.ActionSize)"          -ForegroundColor Cyan

        Write-Host "   Hidden      : $($config.HiddenLayers -join ' -> ')" -ForegroundColor Cyan

        Write-Host "   Memory      : $($config.MemorySize)"          -ForegroundColor Cyan

        Write-Host "   Batch size  : $($config.BatchSize)"           -ForegroundColor Cyan

    }

    # -------------------------------------------------------

    [void] Remember([double[]]$state, [int]$action, [double]$reward,

                    [double[]]$nextState, [bool]$done) {

        $exp = @{

            State     = $state

            Action    = $action

            Reward    = $reward

            NextState = $nextState

            Done      = $done

        }

        $this.Memory.Add($exp)

        $this.TotalSteps++

    }

    # -------------------------------------------------------

    # Epsilon-greedy action selection

    # -------------------------------------------------------

    [int] Act([double[]]$state) {

        if ($this.Rng.NextDouble() -le $this.Epsilon) {

            return $this.Rng.Next(0, $this.ActionSize)

        }

        $qValues = $this.MainNetwork.Predict($state)

        return [DQNAgent]::ArgMax($qValues)

    }

    # -------------------------------------------------------

    # Greedy action for evaluation (no exploration)

    # -------------------------------------------------------

    [int] Predict([double[]]$state) {

        $qValues = $this.MainNetwork.Predict($state)

        return [DQNAgent]::ArgMax($qValues)

    }

    # -------------------------------------------------------

    [double[]] GetQValues([double[]]$state) {

        return $this.MainNetwork.Predict($state)

    }

    # -------------------------------------------------------

    # Sample batch from memory and train main network

    # -------------------------------------------------------

    [double] Replay() {

        if ($this.Memory.Size() -lt $this.Config.BatchSize) {

            return 0.0

        }

        $batch     = $this.Memory.Sample($this.Config.BatchSize)

        $totalLoss = 0.0

        foreach ($exp in $batch) {

            $state     = $exp.State

            $action    = $exp.Action

            $reward    = $exp.Reward

            $nextState = $exp.NextState

            $done      = $exp.Done

            $target = $this.MainNetwork.Predict($state)

            if ($done) {

                $target[$action] = $reward

            } else {

                $nextQ           = $this.TargetNetwork.Predict($nextState)

                $maxNextQ        = ($nextQ | Measure-Object -Maximum).Maximum

                $target[$action] = $reward + $this.Config.Gamma * $maxNextQ

            }

            $this.MainNetwork.TrainSample($state, $target)

            $this.TrainingSteps++

            $currentQ  = $this.MainNetwork.Predict($state)

            $diff      = $currentQ[$action] - $target[$action]

            $totalLoss += $diff * $diff

        }

        # Decay epsilon

        if ($this.Epsilon -gt $this.Config.EpsilonMin) {

            $this.Epsilon *= $this.Config.EpsilonDecay

            if ($this.Epsilon -lt $this.Config.EpsilonMin) {

                $this.Epsilon = $this.Config.EpsilonMin

            }

        }

        $avgLoss       = $totalLoss / $this.Config.BatchSize

        $this.LastLoss = $avgLoss

        $this.LossHistory.Add($avgLoss)

        return $avgLoss

    }

    # -------------------------------------------------------

    # Copy MainNetwork weights to TargetNetwork

    # -------------------------------------------------------

    [void] SyncTargetNetwork() {

        $state = $this.MainNetwork.ExportState()

        $this.TargetNetwork.ImportState($state)

    }

    # -------------------------------------------------------

    [void] EndEpisode([double]$totalReward) {

        $this.TotalEpisodes++

        $this.EpisodeRewards.Add($totalReward)

        if ($this.TotalEpisodes % $this.Config.TargetUpdateFreq -eq 0) {

            $this.SyncTargetNetwork()

            Write-Host "   🔄 Target network synced (Episode $($this.TotalEpisodes))" -ForegroundColor DarkYellow

        }

    }

    # -------------------------------------------------------

    [hashtable] GetStats() {

        $avgReward = 0.0

        $avgLoss   = 0.0

        if ($this.EpisodeRewards.Count -gt 0) {

            $slice     = $this.EpisodeRewards | Select-Object -Last 100

            $avgReward = ($slice | Measure-Object -Average).Average

        }

        if ($this.LossHistory.Count -gt 0) {

            $slice   = $this.LossHistory | Select-Object -Last 100

            $avgLoss = ($slice | Measure-Object -Average).Average

        }

        return @{

            TotalEpisodes   = $this.TotalEpisodes

            TotalSteps      = $this.TotalSteps

            TrainingSteps   = $this.TrainingSteps

            MemorySize      = $this.Memory.Size()

            Epsilon         = [Math]::Round($this.Epsilon, 4)

            LastLoss        = [Math]::Round($this.LastLoss, 6)

            AvgReward100    = [Math]::Round($avgReward, 3)

            AvgLoss100      = [Math]::Round($avgLoss, 6)

            TargetSyncEvery = $this.Config.TargetUpdateFreq

        }

    }

    # -------------------------------------------------------

    [void] PrintStats() {

        $s = $this.GetStats()

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║         DQN Agent Statistics         ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Episodes     : {0,-20}║" -f $s.TotalEpisodes)  -ForegroundColor White

        Write-Host ("║  Total Steps  : {0,-20}║" -f $s.TotalSteps)     -ForegroundColor White

        Write-Host ("║  Train Steps  : {0,-20}║" -f $s.TrainingSteps)  -ForegroundColor White

        Write-Host ("║  Memory Used  : {0,-20}║" -f $s.MemorySize)     -ForegroundColor White

        Write-Host ("║  Epsilon      : {0,-20}║" -f $s.Epsilon)        -ForegroundColor Yellow

        Write-Host ("║  Last Loss    : {0,-20}║" -f $s.LastLoss)       -ForegroundColor Magenta

        Write-Host ("║  Avg Reward   : {0,-20}║" -f $s.AvgReward100)  -ForegroundColor Green

        Write-Host ("║  Avg Loss     : {0,-20}║" -f $s.AvgLoss100)    -ForegroundColor Magenta

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

    # -------------------------------------------------------

    static [int] ArgMax([double[]]$arr) {

        $best = 0

        for ($i = 1; $i -lt $arr.Length; $i++) {

            if ($arr[$i] -gt $arr[$best]) { $best = $i }

        }

        return $best

    }

}

# ============================================================

# Simple CartPole-style test environment (no external deps)

# ============================================================

class DQNEnvironment {

    [double] $Position

    [double] $Velocity

    [double] $Angle

    [double] $AngularVelocity

    [int]    $Steps

    [int]    $MaxSteps

    hidden [System.Random] $Rng

    DQNEnvironment() {

        $this.MaxSteps = 200

        $this.Rng      = [System.Random]::new()

        $this.Reset()

    }

    [double[]] Reset() {

        $this.Position        = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.Velocity        = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.Angle           = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.AngularVelocity = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.Steps           = 0

        return $this.GetState()

    }

    [double[]] GetState() {

        return @($this.Position, $this.Velocity, $this.Angle, $this.AngularVelocity)

    }

    [hashtable] Step([int]$action) {

        $this.Steps++

        $force       = if ($action -eq 1) { 1.0 } else { -1.0 }

        $gravity     = 9.8

        $cartMass    = 1.0

        $poleMass    = 0.1

        $totalMass   = $cartMass + $poleMass

        $halfLen     = 0.25

        $dt          = 0.02

        $cosA  = [Math]::Cos($this.Angle)

        $sinA  = [Math]::Sin($this.Angle)

        $temp  = ($force + $poleMass * $halfLen * $this.AngularVelocity * $this.AngularVelocity * $sinA) / $totalMass

        $aAcc  = ($gravity * $sinA - $cosA * $temp) / ($halfLen * (4.0/3.0 - $poleMass * $cosA * $cosA / $totalMass))

        $acc   = $temp - $poleMass * $halfLen * $aAcc * $cosA / $totalMass

        $this.Position        += $dt * $this.Velocity

        $this.Velocity        += $dt * $acc

        $this.Angle           += $dt * $this.AngularVelocity

        $this.AngularVelocity += $dt * $aAcc

        $done   = ($this.Steps -ge $this.MaxSteps) -or

                  ([Math]::Abs($this.Position) -gt 2.4) -or

                  ([Math]::Abs($this.Angle)    -gt 0.21)

        $reward = if (-not $done) { 1.0 } else { 0.0 }

        return @{ NextState = $this.GetState(); Reward = $reward; Done = $done }

    }

}

# ============================================================

# TRAINING RUNNER

# Types are instantiated HERE (script level) where NeuralNetwork

# and ExperienceReplay are already loaded by LoadAll.ps1

# Then injected into DQNAgent constructor.

# ============================================================

function Invoke-DQNTraining {

    param(

        [int]    $Episodes    = 100,

        [int]    $PrintEvery  = 10,

        [switch] $Quiet,

        [switch] $FastMode

    )

    # ---- Settings ----

    $hiddenLayers  = @(64, 64)

    $batchSize     = 32

    $maxSteps      = 200

    $replayEvery   = 4        # Only train every N steps (huge speed win)

    if ($FastMode) {

        $hiddenLayers = @(16, 16)

        $batchSize    = 16

        $maxSteps     = 30

        $replayEvery  = 4

        if ($Episodes -eq 100) { $Episodes  = 50 }

        if ($PrintEvery -eq 10) { $PrintEvery = 5 }

        Write-Host ""

        Write-Host "⚡ FAST MODE ENABLED" -ForegroundColor Yellow

        Write-Host "   Hidden   : 16 -> 16" -ForegroundColor Yellow

        Write-Host "   Batch    : $batchSize" -ForegroundColor Yellow

        Write-Host "   MaxSteps : $maxSteps"  -ForegroundColor Yellow

        Write-Host "   Episodes : $Episodes"  -ForegroundColor Yellow

    }

    Write-Host ""

    Write-Host "🚀 VBAF DQN Training Started" -ForegroundColor Green

    Write-Host "   Episodes: $Episodes"        -ForegroundColor Cyan

    Write-Host ""

    # ---- Config ----

    $config                  = [DQNConfig]::new()

    $config.StateSize        = 4

    $config.ActionSize       = 2

    $config.HiddenLayers     = $hiddenLayers

    $config.LearningRate     = 0.001

    $config.Gamma            = 0.95

    $config.Epsilon          = 1.0

    $config.EpsilonMin       = 0.01

    $config.EpsilonDecay     = 0.995

    $config.BatchSize        = $batchSize

    $config.MemorySize       = 5000

    $config.TargetUpdateFreq = 10

    # ---- Build layer array ----

    $layers = [System.Collections.Generic.List[int]]::new()

    $layers.Add($config.StateSize)

    foreach ($h in $config.HiddenLayers) { $layers.Add($h) }

    $layers.Add($config.ActionSize)

    $layerArray = $layers.ToArray()

    # ---- Instantiate at script level (PS 5.1 safe) ----

    $mainNetwork   = [NeuralNetwork]::new($layerArray, $config.LearningRate)

    $targetNetwork = [NeuralNetwork]::new($layerArray, $config.LearningRate)

    $memory        = [ExperienceReplay]::new($config.MemorySize)

    $agent = [DQNAgent]::new($config, $mainNetwork, $targetNetwork, $memory)

    $env          = [DQNEnvironment]::new()

    $env.MaxSteps = $maxSteps

    $bestReward = 0.0

    $stepCount  = 0

    for ($ep = 1; $ep -le $Episodes; $ep++) {

        $state       = $env.Reset()

        $totalReward = 0.0

        $done        = $false

        while (-not $done) {

            $action  = $agent.Act($state)

            $result  = $env.Step($action)

            $ns      = $result.NextState

            $reward  = $result.Reward

            $done    = $result.Done

            $agent.Remember($state, $action, $reward, $ns, $done)

            $stepCount++

            # Only replay every N steps - massive speed improvement

            if ($stepCount % $replayEvery -eq 0) {

                $agent.Replay()

            }

            $state        = $ns

            $totalReward += $reward

        }

        $agent.EndEpisode($totalReward)

        if ($totalReward -gt $bestReward) { $bestReward = $totalReward }

        if (-not $Quiet -and ($ep % $PrintEvery -eq 0)) {

            $stats = $agent.GetStats()

            Write-Host ("  Ep {0,4}  Reward: {1,5:F0}  Best: {2,5:F0}  e: {3:F3}  Loss: {4:F5}  Mem: {5}" -f `

                $ep, $totalReward, $bestReward,

                $stats.Epsilon, $stats.LastLoss, $stats.MemorySize) -ForegroundColor White

        }

    }

    Write-Host ""

    Write-Host "✅ Training Complete!" -ForegroundColor Green

    $agent.PrintStats()

    ,$agent  # comma operator forces return as single object in PS 5.1

}

# ============================================================

# TEST SUGGESTIONS

# ============================================================

# 1. BASIC LOAD TEST

#    Run VBAF.LoadAll.ps1 - should see "📦 VBAF.RL.DQN.ps1 loaded"

#

# 2. FAST SMOKE TEST (seconds)

#    $agent = (Invoke-DQNTraining -Episodes 5 -PrintEvery 1 -FastMode)[-1]

#    Verify: DQNAgent created, episodes complete, stats print

#

# 3. STANDARD FAST TRAINING (2-3 minutes)

#    $agent = (Invoke-DQNTraining -Episodes 50 -PrintEvery 5 -FastMode)[-1]

#    Expect: Epsilon decays 1.0 -> ~0.24, Avg Reward > 15

#

# 4. BENCHMARK AGAINST RANDOM (requires VBAF.RL.Environment.ps1)

#    $env = New-VBAFEnvironment -Name "CartPole" -MaxSteps 200

#    Invoke-VBAFBenchmark -Agent $agent -Environment $env -Episodes 10 -Label "DQN vs CartPole"

#    Invoke-VBAFBenchmark -Environment $env -Episodes 10 -Label "Random Baseline"

#    Expect: DQN Agent type shows as DQNAgent

#

# 5. INSPECT AGENT STATE

#    $agent.GetStats()

#    $agent.PrintStats()

#    $agent.Epsilon          # should be near EpsilonMin after full training

#    $agent.Memory.Size()    # should be > BatchSize (32) before replay kicks in

#

# 6. GET Q-VALUES FOR A STATE

#    $state = @(0.1, 0.0, 0.05, 0.0)   # sample CartPole state

#    $agent.GetQValues($state)           # shows Q-value for each action

#    $agent.Predict($state)              # greedy action (0 or 1)

#

# 7. COMPARE ALGORITHMS (after training PPO and A3C too)

#    $dqn = Invoke-DQNTraining -Episodes 50 -PrintEvery 50 -FastMode -Quiet

#    $env = New-VBAFEnvironment -Name "CartPole" -MaxSteps 200

#    Invoke-VBAFBenchmark -Agent $dqn -Environment $env -Episodes 20 -Label "DQN"

# ============================================================

Write-Host "📦 VBAF.RL.DQN.ps1 loaded" -ForegroundColor Green

Write-Host "   Classes : DQNConfig, DQNAgent, DQNEnvironment" -ForegroundColor Cyan

Write-Host "   Function: Invoke-DQNTraining"                  -ForegroundColor Cyan

Write-Host ""

Write-Host "   Quick start:"                                                           -ForegroundColor Yellow

Write-Host '   $agent = (Invoke-DQNTraining -Episodes 100 -PrintEvery 10)[-1]'              -ForegroundColor White

Write-Host '   $agent = (Invoke-DQNTraining -Episodes 50 -PrintEvery 5 -FastMode)[-1]'      -ForegroundColor White

Write-Host '   $agent.PrintStats()'                                                    -ForegroundColor White

Write-Host ""