VBAF.RL.Environment.ps1
|
#Requires -Version 5.1 <# .SYNOPSIS Standardized Environment Interface for VBAF RL Algorithms .DESCRIPTION Provides an OpenAI Gym-like environment interface for all VBAF RL algorithms. Replaces the individual DQNEnvironment, PPOEnvironment, A3CEnvironment classes. Features: - Standardized Reset(), Step(), GetState() interface - State/action space definitions - Reward shaping utilities - Pre-built environments: CartPole, GridWorld, RandomWalk - Environment wrappers: RewardShaper, StateNormalizer All algorithms (DQN, PPO, A3C) can use any environment interchangeably. .NOTES Part of VBAF - Phase 3 Reinforcement Learning Module PS 5.1 compatible #> # Set base path $basePath = $PSScriptRoot # ============================================================ # SPACE DEFINITIONS - describe state and action spaces # ============================================================ class VBAFSpace { [string] $Type # "discrete" or "continuous" [int] $Size # number of actions OR state dimensions [double] $Low # min value (continuous) [double] $High # max value (continuous) VBAFSpace([string]$type, [int]$size) { $this.Type = $type $this.Size = $size $this.Low = -1.0 $this.High = 1.0 } VBAFSpace([string]$type, [int]$size, [double]$low, [double]$high) { $this.Type = $type $this.Size = $size $this.Low = $low $this.High = $high } [string] ToString() { return "$($this.Type)($($this.Size)) [$($this.Low), $($this.High)]" } } # ============================================================ # BASE ENVIRONMENT - all environments inherit this interface # ============================================================ class VBAFEnvironment { [string] $Name [VBAFSpace] $ObservationSpace [VBAFSpace] $ActionSpace [int] $Steps [int] $MaxSteps [double] $TotalReward [int] $EpisodeCount VBAFEnvironment([string]$name, [int]$maxSteps) { $this.Name = $name $this.MaxSteps = $maxSteps $this.Steps = 0 $this.TotalReward = 0.0 $this.EpisodeCount = 0 } # Override in subclass [double[]] Reset() { return @(0.0) } [double[]] GetState() { return @(0.0) } [hashtable] Step([int]$action) { return @{ NextState = @(0.0); Reward = 0.0; Done = $true } } [void] PrintInfo() { Write-Host "Environment : $($this.Name)" -ForegroundColor Cyan Write-Host "Obs Space : $($this.ObservationSpace.ToString())" -ForegroundColor Cyan Write-Host "Act Space : $($this.ActionSpace.ToString())" -ForegroundColor Cyan Write-Host "Max Steps : $($this.MaxSteps)" -ForegroundColor Cyan } } # ============================================================ # CARTPOLE ENVIRONMENT # Classic control problem - balance a pole on a cart # State : [position, velocity, angle, angularVelocity] # Actions: 0=left, 1=right # ============================================================ class CartPoleEnvironment : VBAFEnvironment { [double] $Position [double] $Velocity [double] $Angle [double] $AngularVelocity hidden [System.Random] $Rng CartPoleEnvironment() : base("CartPole", 200) { $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, -4.8, 4.8) $this.ActionSpace = [VBAFSpace]::new("discrete", 2, 0.0, 1.0) $this.Rng = [System.Random]::new() $this.Reset() } CartPoleEnvironment([int]$maxSteps) : base("CartPole", $maxSteps) { $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, -4.8, 4.8) $this.ActionSpace = [VBAFSpace]::new("discrete", 2, 0.0, 1.0) $this.Rng = [System.Random]::new() $this.Reset() } CartPoleEnvironment([int]$maxSteps, [int]$seed) : base("CartPole", $maxSteps) { $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, -4.8, 4.8) $this.ActionSpace = [VBAFSpace]::new("discrete", 2, 0.0, 1.0) $this.Rng = [System.Random]::new($seed) $this.Reset() } [double[]] Reset() { $this.Position = ($this.Rng.NextDouble() - 0.5) * 0.1 $this.Velocity = ($this.Rng.NextDouble() - 0.5) * 0.1 $this.Angle = ($this.Rng.NextDouble() - 0.5) * 0.1 $this.AngularVelocity = ($this.Rng.NextDouble() - 0.5) * 0.1 $this.Steps = 0 $this.TotalReward = 0.0 $this.EpisodeCount++ return $this.GetState() } [double[]] GetState() { return @($this.Position, $this.Velocity, $this.Angle, $this.AngularVelocity) } [hashtable] Step([int]$action) { $this.Steps++ $force = if ($action -eq 1) { 1.0 } else { -1.0 } $gravity = 9.8 $cartMass = 1.0 $poleMass = 0.1 $totalMass = $cartMass + $poleMass $halfLen = 0.25 $dt = 0.02 $cosA = [Math]::Cos($this.Angle) $sinA = [Math]::Sin($this.Angle) $temp = ($force + $poleMass * $halfLen * $this.AngularVelocity * $this.AngularVelocity * $sinA) / $totalMass $aAcc = ($gravity * $sinA - $cosA * $temp) / ($halfLen * (4.0/3.0 - $poleMass * $cosA * $cosA / $totalMass)) $acc = $temp - $poleMass * $halfLen * $aAcc * $cosA / $totalMass $this.Position += $dt * $this.Velocity $this.Velocity += $dt * $acc $this.Angle += $dt * $this.AngularVelocity $this.AngularVelocity += $dt * $aAcc $done = ($this.Steps -ge $this.MaxSteps) -or ([Math]::Abs($this.Position) -gt 2.4) -or ([Math]::Abs($this.Angle) -gt 0.21) $reward = if (-not $done) { 1.0 } else { 0.0 } $this.TotalReward += $reward return @{ NextState = $this.GetState(); Reward = $reward; Done = $done } } } # ============================================================ # GRIDWORLD ENVIRONMENT # Simple grid navigation - agent finds goal avoiding walls # State : [row, col, goalRow, goalCol] normalized 0-1 # Actions: 0=up, 1=right, 2=down, 3=left # ============================================================ class GridWorldEnvironment : VBAFEnvironment { [int] $GridSize [int] $AgentRow [int] $AgentCol [int] $GoalRow [int] $GoalCol hidden [System.Random] $Rng GridWorldEnvironment() : base("GridWorld", 100) { $this.GridSize = 5 $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, 0.0, 1.0) $this.ActionSpace = [VBAFSpace]::new("discrete", 4, 0.0, 3.0) $this.Rng = [System.Random]::new() $this.Reset() } GridWorldEnvironment([int]$gridSize, [int]$maxSteps) : base("GridWorld", $maxSteps) { $this.GridSize = $gridSize $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, 0.0, 1.0) $this.ActionSpace = [VBAFSpace]::new("discrete", 4, 0.0, 3.0) $this.Rng = [System.Random]::new() $this.Reset() } [double[]] Reset() { $this.AgentRow = $this.Rng.Next(0, $this.GridSize) $this.AgentCol = $this.Rng.Next(0, $this.GridSize) $this.GoalRow = $this.Rng.Next(0, $this.GridSize) $this.GoalCol = $this.Rng.Next(0, $this.GridSize) # Make sure agent and goal are not same cell while ($this.AgentRow -eq $this.GoalRow -and $this.AgentCol -eq $this.GoalCol) { $this.GoalRow = $this.Rng.Next(0, $this.GridSize) $this.GoalCol = $this.Rng.Next(0, $this.GridSize) } $this.Steps = 0 $this.TotalReward = 0.0 $this.EpisodeCount++ return $this.GetState() } [double[]] GetState() { [int] $g = $this.GridSize - 1 [double[]] $arr = @(0.0, 0.0, 0.0, 0.0) $arr[0] = $this.AgentRow $arr[1] = $this.AgentCol $arr[2] = $this.GoalRow $arr[3] = $this.GoalCol $arr[0] /= $g $arr[1] /= $g $arr[2] /= $g $arr[3] /= $g return $arr } [hashtable] Step([int]$action) { $this.Steps++ $newRow = $this.AgentRow $newCol = $this.AgentCol switch ($action) { 0 { $newRow-- } # up 1 { $newCol++ } # right 2 { $newRow++ } # down 3 { $newCol-- } # left } # Clamp to grid $newRow = [Math]::Max(0, [Math]::Min($this.GridSize - 1, $newRow)) $newCol = [Math]::Max(0, [Math]::Min($this.GridSize - 1, $newCol)) $this.AgentRow = $newRow $this.AgentCol = $newCol $atGoal = ($this.AgentRow -eq $this.GoalRow -and $this.AgentCol -eq $this.GoalCol) $done = $atGoal -or ($this.Steps -ge $this.MaxSteps) $reward = if ($atGoal) { 10.0 } elseif ($done) { -1.0 } else { -0.1 } $this.TotalReward += $reward return @{ NextState = $this.GetState(); Reward = $reward; Done = $done } } } # ============================================================ # RANDOM WALK ENVIRONMENT # Simple 1D walk - agent tries to reach center (0) # State : [position] normalized # Actions: 0=left, 1=right # Good for quick algorithm sanity checks # ============================================================ class RandomWalkEnvironment : VBAFEnvironment { [int] $Position [int] $Range hidden [System.Random] $Rng RandomWalkEnvironment() : base("RandomWalk", 50) { $this.Range = 10 $this.ObservationSpace = [VBAFSpace]::new("continuous", 1, -1.0, 1.0) $this.ActionSpace = [VBAFSpace]::new("discrete", 2, 0.0, 1.0) $this.Rng = [System.Random]::new() $this.Reset() } [double[]] Reset() { $this.Position = $this.Rng.Next(-$this.Range, $this.Range) $this.Steps = 0 $this.TotalReward = 0.0 $this.EpisodeCount++ return $this.GetState() } [double[]] GetState() { return @([double]$this.Position / $this.Range) } [hashtable] Step([int]$action) { $this.Steps++ if ($action -eq 0) { $this.Position-- } else { $this.Position++ } $this.Position = [Math]::Max(-$this.Range, [Math]::Min($this.Range, $this.Position)) $atCenter = ($this.Position -eq 0) $done = $atCenter -or ($this.Steps -ge $this.MaxSteps) $reward = if ($atCenter) { 10.0 } else { -[Math]::Abs($this.Position) * 0.1 } $this.TotalReward += $reward return @{ NextState = $this.GetState(); Reward = $reward; Done = $done } } } # ============================================================ # ENVIRONMENT FACTORY - create environments by name # ============================================================ function New-VBAFEnvironment { param( [string] $Name = "CartPole", [int] $MaxSteps = 200, [int] $GridSize = 5, [int] $Seed = -1 ) switch ($Name) { "CartPole" { if ($Seed -ge 0) { return [CartPoleEnvironment]::new($MaxSteps, $Seed) } return [CartPoleEnvironment]::new($MaxSteps) } "GridWorld" { return [GridWorldEnvironment]::new($GridSize, $MaxSteps) } "RandomWalk" { return [RandomWalkEnvironment]::new() } default { Write-Host "❌ Unknown environment: $Name" -ForegroundColor Red Write-Host " Available: CartPole, GridWorld, RandomWalk" -ForegroundColor Yellow return $null } } } # ============================================================ # REWARD SHAPER WRAPPER # Wraps any VBAFEnvironment to modify rewards # ============================================================ function New-RewardShaper { param( [object] $Environment, [double] $Scale = 1.0, [double] $Clip = 0.0, # 0 = no clipping [double] $StepPenalty = 0.0 # penalty per step ) return @{ Env = $Environment Scale = $Scale Clip = $Clip StepPenalty = $StepPenalty Reset = { $Environment.Reset() } GetState = { $Environment.GetState() } Step = { param([int]$action) $result = $Environment.Step($action) $r = $result.Reward * $Scale - $StepPenalty if ($Clip -gt 0) { $r = [Math]::Max(-$Clip, [Math]::Min($Clip, $r)) } return @{ NextState = $result.NextState; Reward = $r; Done = $result.Done } } } } # ============================================================ # BENCHMARKING UTILITY # Run any algorithm on any environment and measure performance # ============================================================ function Invoke-VBAFBenchmark { param( [object] $Agent, [object] $Environment, [int] $Episodes = 10, [string] $Label = "Benchmark" ) Write-Host "" Write-Host "⏱️ $Label" -ForegroundColor Yellow Write-Host " Episodes : $Episodes" -ForegroundColor Cyan $rewards = [System.Collections.Generic.List[double]]::new() $timer = [System.Diagnostics.Stopwatch]::StartNew() $rng = [System.Random]::new() $actionSize = $Environment.ActionSpace.Size $useRandom = ($null -eq $Agent) if ($useRandom) { Write-Host " Agent : Random (no agent provided)" -ForegroundColor DarkYellow } else { Write-Host " Agent : $($Agent.GetType().Name)" -ForegroundColor DarkYellow } for ($ep = 1; $ep -le $Episodes; $ep++) { $state = $Environment.Reset() $totalReward = 0.0 $done = $false while (-not $done) { if ($useRandom) { $action = $rng.Next(0, $actionSize) } else { try { $action = $Agent.Predict($state) } catch { $action = $rng.Next(0, $actionSize) } } $result = $Environment.Step($action) $state = $result.NextState $totalReward += $result.Reward $done = $result.Done } $rewards.Add($totalReward) } $timer.Stop() $avg = ($rewards | Measure-Object -Average).Average $max = ($rewards | Measure-Object -Maximum).Maximum $min = ($rewards | Measure-Object -Minimum).Minimum $ms = $timer.ElapsedMilliseconds Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Yellow Write-Host ("║ {0,-36}║" -f $Label) -ForegroundColor Yellow Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Yellow Write-Host ("║ Avg Reward : {0,-23}║" -f [Math]::Round($avg, 2)) -ForegroundColor White Write-Host ("║ Max Reward : {0,-23}║" -f [Math]::Round($max, 2)) -ForegroundColor Green Write-Host ("║ Min Reward : {0,-23}║" -f [Math]::Round($min, 2)) -ForegroundColor White Write-Host ("║ Time (ms) : {0,-23}║" -f $ms) -ForegroundColor Cyan Write-Host ("║ ms/episode : {0,-23}║" -f [Math]::Round($ms / $Episodes, 1)) -ForegroundColor Cyan Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Yellow Write-Host "" return @{ Avg = $avg; Max = $max; Min = $min; TimeMs = $ms } } # ============================================================ # TEST # 1. Run VBAF.LoadAll.ps1 # 2. $env = New-VBAFEnvironment -Name "CartPole" -MaxSteps 200 # 3. $env = New-VBAFEnvironment -Name "GridWorld" -GridSize 5 -MaxSteps 100 # 4. $env = New-VBAFEnvironment -Name "RandomWalk" # 5. $env.PrintInfo() # 6. After training: Invoke-VBAFBenchmark -Agent $agent -Environment $env -Episodes 10 # NOTE: Always capture agent with [-1]: $agent = (Invoke-DQNTraining ...)[-1] # ============================================================ Write-Host "📦 VBAF.RL.Environment.ps1 loaded" -ForegroundColor Green Write-Host " Classes : VBAFSpace, VBAFEnvironment" -ForegroundColor Cyan Write-Host " Environments: CartPole, GridWorld, RandomWalk" -ForegroundColor Cyan Write-Host " Functions : New-VBAFEnvironment" -ForegroundColor Cyan Write-Host " New-RewardShaper" -ForegroundColor Cyan Write-Host " Invoke-VBAFBenchmark" -ForegroundColor Cyan Write-Host "" Write-Host " Quick start:" -ForegroundColor Yellow Write-Host ' $env = New-VBAFEnvironment -Name "CartPole" -MaxSteps 200' -ForegroundColor White Write-Host ' $env.PrintInfo()' -ForegroundColor White Write-Host "" |