VBAF.Enterprise.SupplyChain.ps1
|
#Requires -Version 5.1 <# .SYNOPSIS Pillar 7 - Multi-Agent Strategy: Intelligent Supply Chain .DESCRIPTION Trains a DQN agent to optimize inventory ordering strategy. The agent observes market conditions and learns when to: - OrderSmall : order small stock (action 0) - OrderMedium : order medium stock (action 1) - OrderLarge : order large stock (action 2) - Hold : do not order (action 3) Goal: maximize profit by balancing stock vs demand vs cost. .NOTES Part of VBAF - Phase 9 Enterprise Automation Engine Pillar 7: Multi-Agent Strategy Modeling PS 5.1 compatible #> # ============================================================ # PILLAR 7 - MULTI-AGENT STRATEGY: SUPPLY CHAIN # ============================================================ function Invoke-VBAFSupplyChainTraining { param( [int] $Episodes = 100, [int] $PrintEvery = 20, [switch] $FastMode ) Write-Host "" Write-Host "🏢 VBAF Enterprise - Pillar 7: Multi-Agent Strategy" -ForegroundColor Cyan Write-Host " Training DQN agent on Supply Chain..." -ForegroundColor Cyan Write-Host " Actions: 0=OrderSmall 1=OrderMedium 2=OrderLarge 3=Hold" -ForegroundColor Yellow Write-Host " Goal : maximize profit - balance stock vs demand vs cost" -ForegroundColor Yellow Write-Host "" $scEnv = New-EnterpriseEnvironment -Name "SupplyChain" Write-Host " Phase 1: Baseline (random agent)..." -ForegroundColor Gray $baseline = Invoke-VBAFBenchmark -Environment $scEnv -Episodes 10 -Label "Baseline Random" if ($FastMode) { $Episodes = [Math]::Min($Episodes, 50) } Write-Host "" Write-Host " Phase 2: Training DQN agent ($Episodes episodes)..." -ForegroundColor Gray # Build DQN - 4 state, 4 actions $config = [DQNConfig]::new() $config.StateSize = 4 # inventory, demand, price, backlog $config.ActionSize = 4 # OrderSmall, OrderMedium, OrderLarge, Hold $config.EpsilonDecay = 0.9995 $config.EpsilonMin = 0.05 [int[]] $arch = @(4, 16, 16, 4) $mainNetwork = [NeuralNetwork]::new($arch, $config.LearningRate) $targetNetwork = [NeuralNetwork]::new($arch, $config.LearningRate) $memory = [ExperienceReplay]::new($config.MemorySize) $agent = [DQNAgent]::new($config, $mainNetwork, $targetNetwork, $memory) $results = [System.Collections.Generic.List[object]]::new() for ($ep = 1; $ep -le $Episodes; $ep++) { $state = $scEnv.Reset() $done = $false $epReward = 0.0 $smallCount = 0 $medCount = 0 $largeCount = 0 $holdCount = 0 [int] $stepCount = 0 while (-not $done) { $action = $agent.Act($state) $result = $scEnv.Step($action) $agent.Remember($state, $action, $result.Reward, $result.NextState, $result.Done) $stepCount++ if ($stepCount % 4 -eq 0) { $agent.Replay() } $state = $result.NextState $done = $result.Done $epReward += $result.Reward switch ($action) { 0 { $smallCount++ } 1 { $medCount++ } 2 { $largeCount++ } 3 { $holdCount++ } } } $agent.EndEpisode($epReward) $results.Add(@{ Episode = $ep Reward = $epReward Small = $smallCount Medium = $medCount Large = $largeCount Hold = $holdCount Epsilon = $agent.Epsilon StockOut = $scEnv.StockOuts }) if ($ep % $PrintEvery -eq 0) { $lastN = $results | Select-Object -Last $PrintEvery $avgSum = 0.0 foreach ($r2 in $lastN) { $avgSum += $r2.Reward } $avg = [Math]::Round($avgSum / $lastN.Count, 2) Write-Host (" Ep {0,4}/{1} AvgReward: {2,7} Eps: {3:F3} S:{4} M:{5} L:{6} H:{7} StockOut:{8}" -f $ep, $Episodes, $avg, $agent.Epsilon, $smallCount, $medCount, $largeCount, $holdCount, $scEnv.StockOuts) -ForegroundColor White } } # Pure exploitation for evaluation $agent.Epsilon = 0.0 Write-Host "" Write-Host " Phase 3: Final evaluation (epsilon=0)..." -ForegroundColor Gray $trained = Invoke-VBAFBenchmark -Agent $agent -Environment $scEnv -Episodes 10 -Label "Trained DQN" $bAvg = [Math]::Round($baseline.Avg, 2) $tAvg = [Math]::Round($trained.Avg, 2) $improvement = if ($bAvg -ne 0) { [Math]::Round((($tAvg - $bAvg) / [Math]::Abs($bAvg)) * 100, 1) } else { 0 } Write-Host "" Write-Host "╔══════════════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ Pillar 7: Multi-Agent Strategy - Results ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Baseline (random) avg reward : {0,8} ║" -f $bAvg) -ForegroundColor Gray Write-Host ("║ Trained (DQN) avg reward : {0,8} ║" -f $tAvg) -ForegroundColor Green Write-Host ("║ Improvement : {0,7}% ║" -f $improvement) -ForegroundColor Yellow Write-Host "╠══════════════════════════════════════════════╣" -ForegroundColor Cyan Write-Host "║ Agent learned to: ║" -ForegroundColor Cyan Write-Host "║ Order when inventory low + demand high ║" -ForegroundColor White Write-Host "║ Hold when inventory high + demand low ║" -ForegroundColor White Write-Host "║ Match order size to market conditions ║" -ForegroundColor White Write-Host "╚══════════════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" return @{ Agent = $agent; Results = $results; Baseline = $baseline; Trained = $trained } } # ============================================================ # TEST SUGGESTIONS # ============================================================ # 1. Run VBAF.LoadAll.ps1 # 2. QUICK DEMO # $r = Invoke-VBAFSupplyChainTraining -Episodes 100 -PrintEvery 20 # 3. FAST MODE # $r = Invoke-VBAFSupplyChainTraining -Episodes 50 -PrintEvery 10 -FastMode # 4. INSPECT DECISIONS # $env = New-EnterpriseEnvironment -Name "SupplyChain" # $state = $env.Reset() # Write-Host "Inventory: $($env.Inventory) Demand: $($env.Demand) Price: $($env.Price)" # $action = $r.Agent.Act($state) # $labels = @("OrderSmall","OrderMedium","OrderLarge","Hold") # Write-Host "Agent decision: $($labels[$action])" # ============================================================ Write-Host "📦 VBAF.Enterprise.SupplyChain.ps1 loaded [v3.0.0 🏢]" -ForegroundColor Green Write-Host " Pillar 7 : Multi-Agent Strategy Modeling" -ForegroundColor Cyan Write-Host " Function : Invoke-VBAFSupplyChainTraining" -ForegroundColor Cyan Write-Host "" Write-Host " Quick start:" -ForegroundColor Yellow Write-Host ' $r = Invoke-VBAFSupplyChainTraining -Episodes 100 -PrintEvery 20' -ForegroundColor White Write-Host "" |