VBAF

2.1.0

VBAF.RL.Environment.ps1

                                #Requires -Version 5.1

<#

.SYNOPSIS

    Standardized Environment Interface for VBAF RL Algorithms

.DESCRIPTION

    Provides an OpenAI Gym-like environment interface for all VBAF RL algorithms.

    Replaces the individual DQNEnvironment, PPOEnvironment, A3CEnvironment classes.

    Features:

      - Standardized Reset(), Step(), GetState() interface

      - State/action space definitions

      - Reward shaping utilities

      - Pre-built environments: CartPole, GridWorld, RandomWalk

      - Environment wrappers: RewardShaper, StateNormalizer

    All algorithms (DQN, PPO, A3C) can use any environment interchangeably.

.NOTES

    Part of VBAF - Phase 3 Reinforcement Learning Module

    PS 5.1 compatible

#>

# Set base path

$basePath = $PSScriptRoot

# ============================================================

# SPACE DEFINITIONS - describe state and action spaces

# ============================================================

class VBAFSpace {

    [string] $Type    # "discrete" or "continuous"

    [int]    $Size    # number of actions OR state dimensions

    [double] $Low     # min value (continuous)

    [double] $High    # max value (continuous)

    VBAFSpace([string]$type, [int]$size) {

        $this.Type = $type

        $this.Size = $size

        $this.Low  = -1.0

        $this.High =  1.0

    }

    VBAFSpace([string]$type, [int]$size, [double]$low, [double]$high) {

        $this.Type = $type

        $this.Size = $size

        $this.Low  = $low

        $this.High = $high

    }

    [string] ToString() {

        return "$($this.Type)($($this.Size)) [$($this.Low), $($this.High)]"

    }

}

# ============================================================

# BASE ENVIRONMENT - all environments inherit this interface

# ============================================================

class VBAFEnvironment {

    [string]     $Name

    [VBAFSpace]  $ObservationSpace

    [VBAFSpace]  $ActionSpace

    [int]        $Steps

    [int]        $MaxSteps

    [double]     $TotalReward

    [int]        $EpisodeCount

    VBAFEnvironment([string]$name, [int]$maxSteps) {

        $this.Name         = $name

        $this.MaxSteps     = $maxSteps

        $this.Steps        = 0

        $this.TotalReward  = 0.0

        $this.EpisodeCount = 0

    }

    # Override in subclass

    [double[]] Reset() { return @(0.0) }

    [double[]] GetState() { return @(0.0) }

    [hashtable] Step([int]$action) {

        return @{ NextState = @(0.0); Reward = 0.0; Done = $true }

    }

    [void] PrintInfo() {

        Write-Host "Environment : $($this.Name)"          -ForegroundColor Cyan

        Write-Host "Obs Space   : $($this.ObservationSpace.ToString())" -ForegroundColor Cyan

        Write-Host "Act Space   : $($this.ActionSpace.ToString())"      -ForegroundColor Cyan

        Write-Host "Max Steps   : $($this.MaxSteps)"      -ForegroundColor Cyan

    }

}

# ============================================================

# CARTPOLE ENVIRONMENT

# Classic control problem - balance a pole on a cart

# State : [position, velocity, angle, angularVelocity]

# Actions: 0=left, 1=right

# ============================================================

class CartPoleEnvironment : VBAFEnvironment {

    [double] $Position

    [double] $Velocity

    [double] $Angle

    [double] $AngularVelocity

    hidden [System.Random] $Rng

    CartPoleEnvironment() : base("CartPole", 200) {

        $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, -4.8, 4.8)

        $this.ActionSpace      = [VBAFSpace]::new("discrete",   2,  0.0, 1.0)

        $this.Rng              = [System.Random]::new()

        $this.Reset()

    }

    CartPoleEnvironment([int]$maxSteps) : base("CartPole", $maxSteps) {

        $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, -4.8, 4.8)

        $this.ActionSpace      = [VBAFSpace]::new("discrete",   2,  0.0, 1.0)

        $this.Rng              = [System.Random]::new()

        $this.Reset()

    }

    CartPoleEnvironment([int]$maxSteps, [int]$seed) : base("CartPole", $maxSteps) {

        $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, -4.8, 4.8)

        $this.ActionSpace      = [VBAFSpace]::new("discrete",   2,  0.0, 1.0)

        $this.Rng              = [System.Random]::new($seed)

        $this.Reset()

    }

    [double[]] Reset() {

        $this.Position        = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.Velocity        = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.Angle           = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.AngularVelocity = ($this.Rng.NextDouble() - 0.5) * 0.1

        $this.Steps           = 0

        $this.TotalReward     = 0.0

        $this.EpisodeCount++

        return $this.GetState()

    }

    [double[]] GetState() {

        return @($this.Position, $this.Velocity, $this.Angle, $this.AngularVelocity)

    }

    [hashtable] Step([int]$action) {

        $this.Steps++

        $force     = if ($action -eq 1) { 1.0 } else { -1.0 }

        $gravity   = 9.8

        $cartMass  = 1.0

        $poleMass  = 0.1

        $totalMass = $cartMass + $poleMass

        $halfLen   = 0.25

        $dt        = 0.02

        $cosA = [Math]::Cos($this.Angle)

        $sinA = [Math]::Sin($this.Angle)

        $temp = ($force + $poleMass * $halfLen * $this.AngularVelocity * $this.AngularVelocity * $sinA) / $totalMass

        $aAcc = ($gravity * $sinA - $cosA * $temp) / ($halfLen * (4.0/3.0 - $poleMass * $cosA * $cosA / $totalMass))

        $acc  = $temp - $poleMass * $halfLen * $aAcc * $cosA / $totalMass

        $this.Position        += $dt * $this.Velocity

        $this.Velocity        += $dt * $acc

        $this.Angle           += $dt * $this.AngularVelocity

        $this.AngularVelocity += $dt * $aAcc

        $done   = ($this.Steps -ge $this.MaxSteps) -or

                  ([Math]::Abs($this.Position) -gt 2.4) -or

                  ([Math]::Abs($this.Angle)    -gt 0.21)

        $reward = if (-not $done) { 1.0 } else { 0.0 }

        $this.TotalReward += $reward

        return @{ NextState = $this.GetState(); Reward = $reward; Done = $done }

    }

}

# ============================================================

# GRIDWORLD ENVIRONMENT

# Simple grid navigation - agent finds goal avoiding walls

# State : [row, col, goalRow, goalCol] normalized 0-1

# Actions: 0=up, 1=right, 2=down, 3=left

# ============================================================

class GridWorldEnvironment : VBAFEnvironment {

    [int] $GridSize

    [int] $AgentRow

    [int] $AgentCol

    [int] $GoalRow

    [int] $GoalCol

    hidden [System.Random] $Rng

    GridWorldEnvironment() : base("GridWorld", 100) {

        $this.GridSize         = 5

        $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, 0.0, 1.0)

        $this.ActionSpace      = [VBAFSpace]::new("discrete",   4, 0.0, 3.0)

        $this.Rng              = [System.Random]::new()

        $this.Reset()

    }

    GridWorldEnvironment([int]$gridSize, [int]$maxSteps) : base("GridWorld", $maxSteps) {

        $this.GridSize         = $gridSize

        $this.ObservationSpace = [VBAFSpace]::new("continuous", 4, 0.0, 1.0)

        $this.ActionSpace      = [VBAFSpace]::new("discrete",   4, 0.0, 3.0)

        $this.Rng              = [System.Random]::new()

        $this.Reset()

    }

    [double[]] Reset() {

        $this.AgentRow    = $this.Rng.Next(0, $this.GridSize)

        $this.AgentCol    = $this.Rng.Next(0, $this.GridSize)

        $this.GoalRow     = $this.Rng.Next(0, $this.GridSize)

        $this.GoalCol     = $this.Rng.Next(0, $this.GridSize)

        # Make sure agent and goal are not same cell

        while ($this.AgentRow -eq $this.GoalRow -and $this.AgentCol -eq $this.GoalCol) {

            $this.GoalRow = $this.Rng.Next(0, $this.GridSize)

            $this.GoalCol = $this.Rng.Next(0, $this.GridSize)

        }

        $this.Steps       = 0

        $this.TotalReward = 0.0

        $this.EpisodeCount++

        return $this.GetState()

    }

    [double[]] GetState() {

        [int] $g = $this.GridSize - 1

        [double[]] $arr = @(0.0, 0.0, 0.0, 0.0)

        $arr[0] = $this.AgentRow

        $arr[1] = $this.AgentCol

        $arr[2] = $this.GoalRow

        $arr[3] = $this.GoalCol

        $arr[0] /= $g

        $arr[1] /= $g

        $arr[2] /= $g

        $arr[3] /= $g

        return $arr

    }

    [hashtable] Step([int]$action) {

        $this.Steps++

        $newRow = $this.AgentRow

        $newCol = $this.AgentCol

        switch ($action) {

            0 { $newRow-- }  # up

            1 { $newCol++ }  # right

            2 { $newRow++ }  # down

            3 { $newCol-- }  # left

        }

        # Clamp to grid

        $newRow = [Math]::Max(0, [Math]::Min($this.GridSize - 1, $newRow))

        $newCol = [Math]::Max(0, [Math]::Min($this.GridSize - 1, $newCol))

        $this.AgentRow = $newRow

        $this.AgentCol = $newCol

        $atGoal = ($this.AgentRow -eq $this.GoalRow -and $this.AgentCol -eq $this.GoalCol)

        $done   = $atGoal -or ($this.Steps -ge $this.MaxSteps)

        $reward = if ($atGoal) { 10.0 } elseif ($done) { -1.0 } else { -0.1 }

        $this.TotalReward += $reward

        return @{ NextState = $this.GetState(); Reward = $reward; Done = $done }

    }

}

# ============================================================

# RANDOM WALK ENVIRONMENT

# Simple 1D walk - agent tries to reach center (0)

# State : [position] normalized

# Actions: 0=left, 1=right

# Good for quick algorithm sanity checks

# ============================================================

class RandomWalkEnvironment : VBAFEnvironment {

    [int]    $Position

    [int]    $Range

    hidden [System.Random] $Rng

    RandomWalkEnvironment() : base("RandomWalk", 50) {

        $this.Range            = 10

        $this.ObservationSpace = [VBAFSpace]::new("continuous", 1, -1.0, 1.0)

        $this.ActionSpace      = [VBAFSpace]::new("discrete",   2,  0.0, 1.0)

        $this.Rng              = [System.Random]::new()

        $this.Reset()

    }

    [double[]] Reset() {

        $this.Position    = $this.Rng.Next(-$this.Range, $this.Range)

        $this.Steps       = 0

        $this.TotalReward = 0.0

        $this.EpisodeCount++

        return $this.GetState()

    }

    [double[]] GetState() {

        return @([double]$this.Position / $this.Range)

    }

    [hashtable] Step([int]$action) {

        $this.Steps++

        if ($action -eq 0) { $this.Position-- } else { $this.Position++ }

        $this.Position = [Math]::Max(-$this.Range, [Math]::Min($this.Range, $this.Position))

        $atCenter = ($this.Position -eq 0)

        $done     = $atCenter -or ($this.Steps -ge $this.MaxSteps)

        $reward   = if ($atCenter) { 10.0 } else { -[Math]::Abs($this.Position) * 0.1 }

        $this.TotalReward += $reward

        return @{ NextState = $this.GetState(); Reward = $reward; Done = $done }

    }

}

# ============================================================

# ENVIRONMENT FACTORY - create environments by name

# ============================================================

function New-VBAFEnvironment {

    param(

        [string] $Name     = "CartPole",

        [int]    $MaxSteps = 200,

        [int]    $GridSize = 5,

        [int]    $Seed     = -1

    )

    switch ($Name) {

        "CartPole" {

            if ($Seed -ge 0) {

                return [CartPoleEnvironment]::new($MaxSteps, $Seed)

            }

            return [CartPoleEnvironment]::new($MaxSteps)

        }

        "GridWorld" {

            return [GridWorldEnvironment]::new($GridSize, $MaxSteps)

        }

        "RandomWalk" {

            return [RandomWalkEnvironment]::new()

        }

        default {

            Write-Host "❌ Unknown environment: $Name" -ForegroundColor Red

            Write-Host "   Available: CartPole, GridWorld, RandomWalk" -ForegroundColor Yellow

            return $null

        }

    }

}

# ============================================================

# REWARD SHAPER WRAPPER

# Wraps any VBAFEnvironment to modify rewards

# ============================================================

function New-RewardShaper {

    param(

        [object]    $Environment,

        [double]    $Scale       = 1.0,

        [double]    $Clip        = 0.0,   # 0 = no clipping

        [double]    $StepPenalty = 0.0    # penalty per step

    )

    return @{

        Env         = $Environment

        Scale       = $Scale

        Clip        = $Clip

        StepPenalty = $StepPenalty

        Reset       = { $Environment.Reset() }

        GetState    = { $Environment.GetState() }

        Step        = {

            param([int]$action)

            $result = $Environment.Step($action)

            $r      = $result.Reward * $Scale - $StepPenalty

            if ($Clip -gt 0) {

                $r = [Math]::Max(-$Clip, [Math]::Min($Clip, $r))

            }

            return @{ NextState = $result.NextState; Reward = $r; Done = $result.Done }

        }

    }

}

# ============================================================

# BENCHMARKING UTILITY

# Run any algorithm on any environment and measure performance

# ============================================================

function Invoke-VBAFBenchmark {

    param(

        [object] $Agent,

        [object] $Environment,

        [int]    $Episodes = 10,

        [string] $Label    = "Benchmark"

    )

    Write-Host ""

    Write-Host "⏱️  $Label" -ForegroundColor Yellow

    Write-Host "   Episodes : $Episodes" -ForegroundColor Cyan

    $rewards  = [System.Collections.Generic.List[double]]::new()

    $timer    = [System.Diagnostics.Stopwatch]::StartNew()

    $rng        = [System.Random]::new()

    $actionSize = $Environment.ActionSpace.Size

    $useRandom = ($null -eq $Agent)

    if ($useRandom) {

        Write-Host "   Agent    : Random (no agent provided)" -ForegroundColor DarkYellow

    } else {

        Write-Host "   Agent    : $($Agent.GetType().Name)" -ForegroundColor DarkYellow

    }

    for ($ep = 1; $ep -le $Episodes; $ep++) {

        $state       = $Environment.Reset()

        $totalReward = 0.0

        $done        = $false

        while (-not $done) {

            if ($useRandom) {

                $action = $rng.Next(0, $actionSize)

            } else {

                try   { $action = $Agent.Predict($state) }

                catch { $action = $rng.Next(0, $actionSize) }

            }

            $result       = $Environment.Step($action)

            $state        = $result.NextState

            $totalReward += $result.Reward

            $done         = $result.Done

        }

        $rewards.Add($totalReward)

    }

    $timer.Stop()

    $avg  = ($rewards | Measure-Object -Average).Average

    $max  = ($rewards | Measure-Object -Maximum).Maximum

    $min  = ($rewards | Measure-Object -Minimum).Minimum

    $ms   = $timer.ElapsedMilliseconds

    Write-Host ""

    Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Yellow

    Write-Host ("║  {0,-36}║" -f $Label)                  -ForegroundColor Yellow

    Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Yellow

    Write-Host ("║  Avg Reward : {0,-23}║" -f [Math]::Round($avg, 2)) -ForegroundColor White

    Write-Host ("║  Max Reward : {0,-23}║" -f [Math]::Round($max, 2)) -ForegroundColor Green

    Write-Host ("║  Min Reward : {0,-23}║" -f [Math]::Round($min, 2)) -ForegroundColor White

    Write-Host ("║  Time (ms)  : {0,-23}║" -f $ms)                    -ForegroundColor Cyan

    Write-Host ("║  ms/episode : {0,-23}║" -f [Math]::Round($ms / $Episodes, 1)) -ForegroundColor Cyan

    Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Yellow

    Write-Host ""

    return @{ Avg = $avg; Max = $max; Min = $min; TimeMs = $ms }

}

# ============================================================

# TEST

# 1. Run VBAF.LoadAll.ps1

# 2. $env = New-VBAFEnvironment -Name "CartPole" -MaxSteps 200

# 3. $env = New-VBAFEnvironment -Name "GridWorld" -GridSize 5 -MaxSteps 100

# 4. $env = New-VBAFEnvironment -Name "RandomWalk"

# 5. $env.PrintInfo()

# 6. After training: Invoke-VBAFBenchmark -Agent $agent -Environment $env -Episodes 10

# NOTE: Always capture agent with [-1]: $agent = (Invoke-DQNTraining ...)[-1]

# ============================================================

Write-Host "📦 VBAF.RL.Environment.ps1 loaded" -ForegroundColor Green

Write-Host "   Classes   : VBAFSpace, VBAFEnvironment"          -ForegroundColor Cyan

Write-Host "   Environments: CartPole, GridWorld, RandomWalk"   -ForegroundColor Cyan

Write-Host "   Functions : New-VBAFEnvironment"                 -ForegroundColor Cyan

Write-Host "              New-RewardShaper"                     -ForegroundColor Cyan

Write-Host "              Invoke-VBAFBenchmark"                 -ForegroundColor Cyan

Write-Host ""

Write-Host "   Quick start:" -ForegroundColor Yellow

Write-Host '   $env = New-VBAFEnvironment -Name "CartPole" -MaxSteps 200' -ForegroundColor White

Write-Host '   $env.PrintInfo()'                                           -ForegroundColor White

Write-Host ""