VBAF.ML.RNN.ps1
|
#Requires -Version 5.1 <# .SYNOPSIS Recurrent Neural Networks - Sequence Learning Architectures .DESCRIPTION Implements recurrent architectures from scratch. Designed as a TEACHING resource - every gate explained. Architectures included: - BasicRNN : simple recurrent cell, vanishing gradient problem - LSTM : Long Short-Term Memory, forget/input/output gates - GRU : Gated Recurrent Unit, simpler than LSTM - BidirectionalRNN : processes sequence forward AND backward - Seq2Seq : encoder-decoder for sequence translation - Attention : learn WHICH part of input to focus on Utilities: - Sequence datasets : sine wave, text, number sequences - Gradient clipping : prevent exploding gradients - Teacher forcing : seq2seq training trick .NOTES Part of VBAF - Phase 6 Deep Learning Module PS 5.1 compatible - pure PowerShell, no dependencies Teaching project - every gate equation shown step by step! #> $basePath = $PSScriptRoot # ============================================================ # TEACHING NOTE: Why Recurrent Networks? # Standard networks treat each input INDEPENDENTLY. # But sequences have CONTEXT - what came before matters! # # "The cat sat on the ___" -> "mat" (context from earlier words) # Stock price tomorrow depends on prices OVER TIME # Music note depends on what was played BEFORE # # RNNs maintain a HIDDEN STATE - a memory of past inputs. # At each step: h_t = f(x_t, h_{t-1}) # The hidden state carries information forward through time. # # Problem: Basic RNNs suffer from VANISHING GRADIENTS. # Information from many steps ago fades away. # LSTM and GRU solve this with GATES that control memory flow. # ============================================================ # ============================================================ # ACTIVATION FUNCTIONS # ============================================================ function Invoke-RNNSigmoid { param([double[]]$x) return $x | ForEach-Object { 1.0 / (1.0 + [Math]::Exp(-[Math]::Max(-500, [Math]::Min(500, $_)))) } } function Invoke-RNNTanh { param([double[]]$x) return $x | ForEach-Object { $e2 = [Math]::Exp(2 * [Math]::Max(-250, [Math]::Min(250, $_))) ($e2 - 1) / ($e2 + 1) } } function Invoke-RNNSoftmax { param([double[]]$x) $maxV = ($x | Measure-Object -Maximum).Maximum $exps = $x | ForEach-Object { [Math]::Exp($_ - $maxV) } $sumE = ($exps | Measure-Object -Sum).Sum return $exps | ForEach-Object { $_ / $sumE } } # Vector operations function Add-Vectors { param([double[]]$a, [double[]]$b) $r = @(0.0) * $a.Length for ($i = 0; $i -lt $a.Length; $i++) { $r[$i] = $a[$i] + $b[$i] } return $r } function Mul-Vectors { param([double[]]$a, [double[]]$b) $r = @(0.0) * $a.Length for ($i = 0; $i -lt $a.Length; $i++) { $r[$i] = $a[$i] * $b[$i] } return $r } # Matrix-vector multiply: W (rows x cols) * x (cols) -> (rows) function MatVec { param([double[]]$W, [double[]]$x, [int]$rows, [int]$cols) $r = @(0.0) * $rows for ($i = 0; $i -lt $rows; $i++) { $sum = 0.0 for ($j = 0; $j -lt $cols; $j++) { $sum += $W[$i * $cols + $j] * $x[$j] } $r[$i] = $sum } return $r } # Random weight matrix initialization (Xavier) function New-RNNWeights { param([int]$rows, [int]$cols, [int]$seed = 42) $rng = [System.Random]::new($seed) $scale = [Math]::Sqrt(2.0 / ($rows + $cols)) $W = @(0.0) * ($rows * $cols) for ($i = 0; $i -lt $W.Length; $i++) { $W[$i] = ($rng.NextDouble() * 2 - 1) * $scale } return $W } # Gradient clipping - prevent exploding gradients function Invoke-GradientClip { param([double[]]$grads, [double]$threshold = 1.0) $norm = 0.0 foreach ($g in $grads) { $norm += $g * $g } $norm = [Math]::Sqrt($norm) if ($norm -gt $threshold) { $scale = $threshold / $norm return $grads | ForEach-Object { $_ * $scale } } return $grads } # ============================================================ # BASIC RNN CELL # ============================================================ # TEACHING NOTE: The simplest recurrent cell. # At each timestep t: # h_t = tanh(W_xh * x_t + W_hh * h_{t-1} + b_h) # y_t = W_hy * h_t + b_y # # W_xh : input -> hidden weights # W_hh : hidden -> hidden weights (the recurrent connection!) # W_hy : hidden -> output weights # # PROBLEM: tanh gradient < 1, so after many timesteps # gradients shrink to zero = VANISHING GRADIENT. # The network forgets events from many steps ago! # ============================================================ class BasicRNNCell { [int] $InputSize [int] $HiddenSize [double[]] $Wxh # input->hidden [double[]] $Whh # hidden->hidden [double[]] $Bh # hidden bias [double[]] $H # current hidden state [System.Collections.ArrayList] $HHistory # hidden states over time BasicRNNCell([int]$inputSize, [int]$hiddenSize) { $this.InputSize = $inputSize $this.HiddenSize = $hiddenSize $this.Wxh = New-RNNWeights -rows $hiddenSize -cols $inputSize -seed 42 $this.Whh = New-RNNWeights -rows $hiddenSize -cols $hiddenSize -seed 43 $this.Bh = @(0.0) * $hiddenSize $this.H = @(0.0) * $hiddenSize $this.HHistory = [System.Collections.ArrayList]::new() } [void] Reset() { $this.H = @(0.0) * $this.HiddenSize $this.HHistory.Clear() } # One step forward [double[]] Step([double[]]$x) { $xh = MatVec $this.Wxh $x $this.HiddenSize $this.InputSize $hh = MatVec $this.Whh $this.H $this.HiddenSize $this.HiddenSize $preact = Add-Vectors (Add-Vectors $xh $hh) $this.Bh $this.H = Invoke-RNNTanh $preact $this.HHistory.Add($this.H.Clone()) | Out-Null return $this.H } # Process full sequence, return all hidden states [double[][]] Forward([double[][]]$sequence) { $this.Reset() $outputs = @() foreach ($x in $sequence) { $stepOut = $this.Step($x) $outputs += ,$stepOut } return $outputs } [void] PrintSummary() { Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ Basic RNN Cell ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Input size : {0,-22}║" -f $this.InputSize) -ForegroundColor White Write-Host ("║ Hidden size : {0,-22}║" -f $this.HiddenSize) -ForegroundColor White $params = $this.HiddenSize * $this.InputSize + $this.HiddenSize * $this.HiddenSize + $this.HiddenSize Write-Host ("║ Parameters : {0,-22}║" -f $params) -ForegroundColor Yellow Write-Host ("║ Equation : h=tanh(Wx+Uh+b){0,-8}║" -f "") -ForegroundColor DarkGray Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" } } # ============================================================ # LSTM CELL # ============================================================ # TEACHING NOTE: LSTM solves the vanishing gradient problem! # Key idea: a CELL STATE (c_t) acts as a "conveyor belt" # carrying information through time with minimal modification. # # Three GATES control information flow: # # FORGET GATE: f_t = sigmoid(W_f * [h_{t-1}, x_t] + b_f) # "How much of the old cell state do we keep?" # f_t=0 : forget everything, f_t=1 : keep everything # # INPUT GATE: i_t = sigmoid(W_i * [h_{t-1}, x_t] + b_i) # g_t = tanh(W_g * [h_{t-1}, x_t] + b_g) # "What new information do we store in the cell state?" # # OUTPUT GATE: o_t = sigmoid(W_o * [h_{t-1}, x_t] + b_o) # "What do we output based on the cell state?" # # CELL UPDATE: c_t = f_t * c_{t-1} + i_t * g_t # HIDDEN STATE: h_t = o_t * tanh(c_t) # # The cell state highway lets gradients flow without vanishing! # ============================================================ class LSTMCell { [int] $InputSize [int] $HiddenSize # Gate weights [hidden+input] -> hidden [double[]] $Wf # forget gate [double[]] $Wi # input gate [double[]] $Wg # cell gate (candidate) [double[]] $Wo # output gate [double[]] $Bf # forget bias [double[]] $Bi # input bias [double[]] $Bg # cell bias [double[]] $Bo # output bias [double[]] $H # hidden state [double[]] $C # cell state [System.Collections.ArrayList] $HHistory [System.Collections.ArrayList] $CHistory LSTMCell([int]$inputSize, [int]$hiddenSize) { $this.InputSize = $inputSize $this.HiddenSize = $hiddenSize $combined = $inputSize + $hiddenSize # Each gate: (hidden+input) -> hidden $this.Wf = New-RNNWeights -rows $hiddenSize -cols $combined -seed 10 $this.Wi = New-RNNWeights -rows $hiddenSize -cols $combined -seed 11 $this.Wg = New-RNNWeights -rows $hiddenSize -cols $combined -seed 12 $this.Wo = New-RNNWeights -rows $hiddenSize -cols $combined -seed 13 $this.Bf = @(1.0) * $hiddenSize # forget bias=1 helps remember by default $this.Bi = @(0.0) * $hiddenSize $this.Bg = @(0.0) * $hiddenSize $this.Bo = @(0.0) * $hiddenSize $this.H = @(0.0) * $hiddenSize $this.C = @(0.0) * $hiddenSize $this.HHistory = [System.Collections.ArrayList]::new() $this.CHistory = [System.Collections.ArrayList]::new() } [void] Reset() { $this.H = @(0.0) * $this.HiddenSize $this.C = @(0.0) * $this.HiddenSize $this.HHistory.Clear() $this.CHistory.Clear() } [double[]] Step([double[]]$x) { # Concatenate [h_{t-1}, x_t] $combined = $this.HiddenSize + $this.InputSize $hx = @(0.0) * $combined for ($i = 0; $i -lt $this.HiddenSize; $i++) { $hx[$i] = $this.H[$i] } for ($i = 0; $i -lt $this.InputSize; $i++) { $hx[$this.HiddenSize + $i] = $x[$i] } # Gates $fRaw = Add-Vectors (MatVec $this.Wf $hx $this.HiddenSize $combined) $this.Bf $iRaw = Add-Vectors (MatVec $this.Wi $hx $this.HiddenSize $combined) $this.Bi $gRaw = Add-Vectors (MatVec $this.Wg $hx $this.HiddenSize $combined) $this.Bg $oRaw = Add-Vectors (MatVec $this.Wo $hx $this.HiddenSize $combined) $this.Bo $f = Invoke-RNNSigmoid $fRaw # forget gate $i = Invoke-RNNSigmoid $iRaw # input gate $g = Invoke-RNNTanh $gRaw # candidate cell $o = Invoke-RNNSigmoid $oRaw # output gate # Cell state update: c_t = f * c_{t-1} + i * g $this.C = Add-Vectors (Mul-Vectors $f $this.C) (Mul-Vectors $i $g) # Hidden state: h_t = o * tanh(c_t) $this.H = Mul-Vectors $o (Invoke-RNNTanh $this.C) $this.HHistory.Add($this.H.Clone()) | Out-Null $this.CHistory.Add($this.C.Clone()) | Out-Null return $this.H } [double[][]] Forward([double[][]]$sequence) { $this.Reset() $outputs = @() foreach ($x in $sequence) { $stepOut = $this.Step($x) $outputs += ,$stepOut } return $outputs } [void] PrintSummary() { Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ LSTM Cell ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Input size : {0,-22}║" -f $this.InputSize) -ForegroundColor White Write-Host ("║ Hidden size : {0,-22}║" -f $this.HiddenSize) -ForegroundColor White $combined = $this.InputSize + $this.HiddenSize $params = 4 * ($this.HiddenSize * $combined + $this.HiddenSize) Write-Host ("║ Parameters : {0,-22}║" -f $params) -ForegroundColor Yellow Write-Host ("║ Gates : forget,input,cell,out{0,-1}║" -f "") -ForegroundColor DarkGray Write-Host ("║ Cell state : long-term memory{0,-6}║" -f "") -ForegroundColor DarkGray Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" } [void] PrintGateActivity([int]$step) { if ($step -ge $this.HHistory.Count) { Write-Host "Step out of range" -ForegroundColor Red; return } $hState = $this.HHistory[$step] $cState = $this.CHistory[$step] Write-Host "" Write-Host ("🔦 LSTM Gate Activity at step {0}:" -f $step) -ForegroundColor Green Write-Host " Hidden state (h):" -ForegroundColor Cyan -NoNewline for ($i = 0; $i -lt [Math]::Min(8, $hState.Length); $i++) { $bar = if ($hState[$i] -gt 0) { "+" } else { "-" } Write-Host (" {0}{1:F2}" -f $bar, [Math]::Abs($hState[$i])) -ForegroundColor White -NoNewline } Write-Host "" Write-Host " Cell state (c):" -ForegroundColor Cyan -NoNewline for ($i = 0; $i -lt [Math]::Min(8, $cState.Length); $i++) { $bar = if ($cState[$i] -gt 0) { "+" } else { "-" } Write-Host (" {0}{1:F2}" -f $bar, [Math]::Abs($cState[$i])) -ForegroundColor Yellow -NoNewline } Write-Host "" Write-Host "" } } # ============================================================ # GRU CELL # ============================================================ # TEACHING NOTE: GRU = Gated Recurrent Unit (2014) # Simpler than LSTM - only 2 gates, no separate cell state. # Often performs as well as LSTM with fewer parameters! # # RESET GATE: r_t = sigmoid(W_r * [h_{t-1}, x_t]) # "How much of past hidden state do we use?" # r_t=0 : ignore past completely (start fresh) # # UPDATE GATE: z_t = sigmoid(W_z * [h_{t-1}, x_t]) # "How much do we update the hidden state?" # z_t=0 : keep old state, z_t=1 : use new candidate # # CANDIDATE: h~_t = tanh(W * [r_t * h_{t-1}, x_t]) # HIDDEN: h_t = (1-z_t) * h_{t-1} + z_t * h~_t # # GRU vs LSTM: # LSTM: 4 weight matrices, separate cell state # GRU : 3 weight matrices, single hidden state # Rule of thumb: try GRU first, use LSTM if more memory needed # ============================================================ class GRUCell { [int] $InputSize [int] $HiddenSize [double[]] $Wr # reset gate [double[]] $Wz # update gate [double[]] $Wh # candidate hidden [double[]] $Br [double[]] $Bz [double[]] $Bh [double[]] $H [System.Collections.ArrayList] $HHistory GRUCell([int]$inputSize, [int]$hiddenSize) { $this.InputSize = $inputSize $this.HiddenSize = $hiddenSize $combined = $inputSize + $hiddenSize $this.Wr = New-RNNWeights -rows $hiddenSize -cols $combined -seed 20 $this.Wz = New-RNNWeights -rows $hiddenSize -cols $combined -seed 21 $this.Wh = New-RNNWeights -rows $hiddenSize -cols $combined -seed 22 $this.Br = @(0.0) * $hiddenSize $this.Bz = @(0.0) * $hiddenSize $this.Bh = @(0.0) * $hiddenSize $this.H = @(0.0) * $hiddenSize $this.HHistory = [System.Collections.ArrayList]::new() } [void] Reset() { $this.H = @(0.0) * $this.HiddenSize $this.HHistory.Clear() } [double[]] Step([double[]]$x) { $combined = $this.HiddenSize + $this.InputSize # Concatenate [h, x] $hx = @(0.0) * $combined for ($i = 0; $i -lt $this.HiddenSize; $i++) { $hx[$i] = $this.H[$i] } for ($i = 0; $i -lt $this.InputSize; $i++) { $hx[$this.HiddenSize + $i] = $x[$i] } # Reset and update gates $r = Invoke-RNNSigmoid (Add-Vectors (MatVec $this.Wr $hx $this.HiddenSize $combined) $this.Br) $z = Invoke-RNNSigmoid (Add-Vectors (MatVec $this.Wz $hx $this.HiddenSize $combined) $this.Bz) # Candidate: [r * h, x] $rh = Mul-Vectors $r $this.H $rhx = @(0.0) * $combined for ($i = 0; $i -lt $this.HiddenSize; $i++) { $rhx[$i] = $rh[$i] } for ($i = 0; $i -lt $this.InputSize; $i++) { $rhx[$this.HiddenSize + $i] = $x[$i] } $hCand = Invoke-RNNTanh (Add-Vectors (MatVec $this.Wh $rhx $this.HiddenSize $combined) $this.Bh) # Update: h_t = (1-z)*h + z*h_cand $newH = @(0.0) * $this.HiddenSize for ($i = 0; $i -lt $this.HiddenSize; $i++) { $newH[$i] = (1 - $z[$i]) * $this.H[$i] + $z[$i] * $hCand[$i] } $this.H = $newH $this.HHistory.Add($this.H.Clone()) | Out-Null return $this.H } [double[][]] Forward([double[][]]$sequence) { $this.Reset() $outputs = @() foreach ($x in $sequence) { $outputs += ,$this.Step($x) } return $outputs } [void] PrintSummary() { Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ GRU Cell ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Input size : {0,-22}║" -f $this.InputSize) -ForegroundColor White Write-Host ("║ Hidden size : {0,-22}║" -f $this.HiddenSize) -ForegroundColor White $combined = $this.InputSize + $this.HiddenSize $params = 3 * ($this.HiddenSize * $combined + $this.HiddenSize) Write-Host ("║ Parameters : {0,-22}║" -f $params) -ForegroundColor Yellow Write-Host ("║ Gates : reset, update{0,-9}║" -f "") -ForegroundColor DarkGray Write-Host ("║ vs LSTM : 25% fewer params{0,-5}║" -f "") -ForegroundColor Green Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" } } # ============================================================ # BIDIRECTIONAL RNN WRAPPER # ============================================================ # TEACHING NOTE: Standard RNN only sees the PAST. # Bidirectional processes the sequence BOTH ways: # Forward pass: x_1 -> x_2 -> x_3 ... x_T # Backward pass: x_T -> x_{T-1} ... x_1 # Then concatenates both hidden states at each timestep. # # Why? Some tasks need future context too! # "The bank was steep" vs "The bank was closed" # The word "bank" meaning depends on what comes AFTER it! # ============================================================ class BidirectionalRNN { [object] $ForwardCell [object] $BackwardCell [string] $CellType # "RNN", "LSTM", "GRU" [int] $InputSize [int] $HiddenSize BidirectionalRNN([string]$cellType, [int]$inputSize, [int]$hiddenSize) { $this.CellType = $cellType $this.InputSize = $inputSize $this.HiddenSize = $hiddenSize switch ($cellType) { "LSTM" { $this.ForwardCell = [LSTMCell]::new($inputSize, $hiddenSize) $this.BackwardCell = [LSTMCell]::new($inputSize, $hiddenSize) } "GRU" { $this.ForwardCell = [GRUCell]::new($inputSize, $hiddenSize) $this.BackwardCell = [GRUCell]::new($inputSize, $hiddenSize) } default { $this.ForwardCell = [BasicRNNCell]::new($inputSize, $hiddenSize) $this.BackwardCell = [BasicRNNCell]::new($inputSize, $hiddenSize) } } } # Returns concatenated [forward, backward] at each timestep [double[][]] Forward([double[][]]$sequence) { $n = $sequence.Length # Forward pass $fwdOutputs = $this.ForwardCell.Forward($sequence) # Backward pass (reverse sequence) $revSeq = @() for ($i = $n-1; $i -ge 0; $i--) { $revSeq += ,$sequence[$i] } $bwdOutputsRev = $this.BackwardCell.Forward($revSeq) # Reverse backward outputs to align with original positions $bwdOutputs = @() for ($i = $n-1; $i -ge 0; $i--) { $bwdOutputs += ,$bwdOutputsRev[$i] } # Concatenate at each position $combined = @() for ($i = 0; $i -lt $n; $i++) { $fwd = $fwdOutputs[$i] $bwd = $bwdOutputs[$i] $cat = @(0.0) * ($fwd.Length + $bwd.Length) for ($j = 0; $j -lt $fwd.Length; $j++) { $cat[$j] = $fwd[$j] } for ($j = 0; $j -lt $bwd.Length; $j++) { $cat[$fwd.Length + $j] = $bwd[$j] } $combined += ,$cat } return $combined } [void] PrintSummary() { Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ Bidirectional RNN ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Cell type : {0,-22}║" -f $this.CellType) -ForegroundColor White Write-Host ("║ Input size : {0,-22}║" -f $this.InputSize) -ForegroundColor White Write-Host ("║ Hidden size : {0,-22}║" -f $this.HiddenSize) -ForegroundColor White Write-Host ("║ Output size : {0,-22}║" -f ($this.HiddenSize * 2)) -ForegroundColor Yellow Write-Host ("║ Direction : forward + backward{0,-3}║" -f "") -ForegroundColor DarkGray Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" } } # ============================================================ # ATTENTION MECHANISM # ============================================================ # TEACHING NOTE: Attention = "Where should I look?" # Instead of compressing the WHOLE sequence into one vector, # attention lets the model focus on RELEVANT parts. # # For each output step, compute a weight for each input step: # score(q, k) = q · k (dot product attention) # weights = softmax(scores) # context = sum(weights * values) # # Q = Query : what we're looking for # K = Keys : what each encoder step "offers" # V = Values : what we actually read when we attend # # This is the foundation of TRANSFORMERS! # "Attention is All You Need" (2017) revolutionized NLP. # ============================================================ class DotProductAttention { [double[]] $AttentionWeights # last computed weights DotProductAttention() {} # query: (d,) keys: (T, d) values: (T, dv) [double[]] Forward([double[]]$query, [double[][]]$keys, [double[][]]$values) { $T = $keys.Length $d = $query.Length # Score = query · key_t for each t $scores = @(0.0) * $T for ($t = 0; $t -lt $T; $t++) { $dot = 0.0 for ($i = 0; $i -lt $d; $i++) { $dot += $query[$i] * $keys[$t][$i] } $scores[$t] = $dot / [Math]::Sqrt($d) # scale by sqrt(d) } # Attention weights = softmax(scores) $this.AttentionWeights = Invoke-RNNSoftmax $scores # Context = weighted sum of values $dv = $values[0].Length $context = @(0.0) * $dv for ($t = 0; $t -lt $T; $t++) { for ($i = 0; $i -lt $dv; $i++) { $context[$i] += $this.AttentionWeights[$t] * $values[$t][$i] } } return $context } [void] PrintAttentionMap([string[]]$tokens) { Write-Host "" Write-Host "🔍 Attention Weights:" -ForegroundColor Green $bars = "░▒▓█" for ($t = 0; $t -lt $this.AttentionWeights.Length; $t++) { $w = $this.AttentionWeights[$t] $barN = [int]($w * 20) $bar = "█" * $barN $tok = if ($t -lt $tokens.Length) { $tokens[$t] } else { "t$t" } $color = if ($w -gt 0.3) { "Green" } elseif ($w -gt 0.1) { "Yellow" } else { "DarkGray" } Write-Host (" {0,-12} {1,6:F4} {2}" -f $tok, $w, $bar) -ForegroundColor $color } Write-Host "" } } # ============================================================ # SEQ2SEQ MODEL (Encoder-Decoder) # ============================================================ # TEACHING NOTE: Seq2Seq translates one sequence to another. # Examples: English -> French, Question -> Answer # # ENCODER: reads the input sequence, produces a context vector # "Summarise the input into a fixed-size memory" # # DECODER: generates the output sequence from context # "Expand the memory into the output sequence" # # TEACHER FORCING: during training, feed the CORRECT previous # output as the next decoder input (not the predicted one). # This speeds up training but can cause "exposure bias" at test. # ============================================================ class Seq2SeqModel { [LSTMCell] $Encoder [LSTMCell] $Decoder [double[]] $Wy # decoder hidden -> output [double[]] $By # output bias [int] $OutputSize [DotProductAttention] $Attention Seq2SeqModel([int]$inputSize, [int]$hiddenSize, [int]$outputSize) { $this.Encoder = [LSTMCell]::new($inputSize, $hiddenSize) $this.Decoder = [LSTMCell]::new($outputSize, $hiddenSize) $this.Wy = New-RNNWeights -rows $outputSize -cols $hiddenSize -seed 99 $this.By = @(0.0) * $outputSize $this.OutputSize = $outputSize $this.Attention = [DotProductAttention]::new() } # Encode input sequence -> final hidden state [hashtable] Encode([double[][]]$inputSeq) { $this.Encoder.Reset() foreach ($x in $inputSeq) { $this.Encoder.Step($x) | Out-Null } return @{ H=$this.Encoder.H.Clone(); C=$this.Encoder.C.Clone() } } # Decode: generate output sequence of given length [double[][]] Decode([hashtable]$context, [int]$outputLen) { # Initialize decoder with encoder final state $this.Decoder.H = $context.H $this.Decoder.C = $context.C $outputs = @() $input = @(0.0) * $this.OutputSize # start token = zeros for ($t = 0; $t -lt $outputLen; $t++) { $decOut = $this.Decoder.Step($input) $out = Add-Vectors (MatVec $this.Wy $decOut $this.OutputSize $this.Decoder.HiddenSize) $this.By $prob = Invoke-RNNSoftmax $out $outputs += ,$prob $input = $prob # feed output as next input (no teacher forcing at inference) } return $outputs } [void] PrintSummary() { Write-Host "" Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ Seq2Seq Model ║" -ForegroundColor Cyan Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan Write-Host ("║ Encoder : LSTM({0}->{1}){2,-12}║" -f $this.Encoder.InputSize, $this.Encoder.HiddenSize, "") -ForegroundColor White Write-Host ("║ Decoder : LSTM({0}->{1}){2,-12}║" -f $this.Decoder.InputSize, $this.Decoder.HiddenSize, "") -ForegroundColor White Write-Host ("║ Output size : {0,-22}║" -f $this.OutputSize) -ForegroundColor White Write-Host ("║ Attention : DotProduct{0,-12}║" -f "") -ForegroundColor Yellow Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan Write-Host "" } } # ============================================================ # OUTPUT LAYER FOR SEQUENCE CLASSIFICATION/REGRESSION # ============================================================ class RNNOutputLayer { [double[]] $W [double[]] $B [int] $InputSize [int] $OutputSize [string] $Activation RNNOutputLayer([int]$inputSize, [int]$outputSize, [string]$activation) { $this.InputSize = $inputSize $this.OutputSize = $outputSize $this.Activation = $activation $this.W = New-RNNWeights -rows $outputSize -cols $inputSize -seed 77 $this.B = @(0.0) * $outputSize } [double[]] Forward([double[]]$h) { $raw = Add-Vectors (MatVec $this.W $h $this.OutputSize $this.InputSize) $this.B $out = switch ($this.Activation) { "softmax" { Invoke-RNNSoftmax $raw } "sigmoid" { Invoke-RNNSigmoid $raw } "tanh" { Invoke-RNNTanh $raw } default { $raw } } return $out } } # ============================================================ # BUILT-IN DATASETS # ============================================================ function Get-VBAFSequenceDataset { param([string]$Name = "SineWave") $rng = [System.Random]::new(42) switch ($Name) { "SineWave" { Write-Host "📊 Dataset: SineWave (predict next value)" -ForegroundColor Cyan Write-Host " Task: given 10 values, predict the 11th" -ForegroundColor Cyan $n = 200 $seqLen = 10 $step = 0.1 $values = @() for ($i = 0; $i -lt ($n + $seqLen + 1); $i++) { $values += [Math]::Sin($i * $step) + ($rng.NextDouble() - 0.5) * 0.1 } $sequences = @(); $targets = @() for ($i = 0; $i -lt $n; $i++) { $seq = @() for ($j = 0; $j -lt $seqLen; $j++) { $seq += ,@($values[$i + $j]) # single feature } $sequences += ,$seq $targets += $values[$i + $seqLen] } return @{ Sequences=$sequences; Targets=$targets; SeqLen=$seqLen; InputSize=1; Task="regression" } } "BinaryAdd" { Write-Host "📊 Dataset: BinaryAdd (seq2seq)" -ForegroundColor Cyan Write-Host " Task: add two 4-bit binary numbers -> 5-bit result" -ForegroundColor Cyan $seqs = @(); $targets = @() for ($i = 0; $i -lt 50; $i++) { $a = $rng.Next(0, 16) $b = $rng.Next(0, 16) $cSum = $a + $b # Encode as bit sequences (LSB first) $aSeq = @(); $bSeq = @(); $cSeq = @() for ($bit = 0; $bit -lt 4; $bit++) { $aSeq += ,@([double](($a -shr $bit) -band 1), [double](($b -shr $bit) -band 1)) } for ($bit = 0; $bit -lt 5; $bit++) { $cSeq += ,@([double](($cSum -shr $bit) -band 1)) } $seqs += ,$aSeq $targets += ,$cSeq } return @{ Sequences=$seqs; Targets=$targets; SeqLen=4; InputSize=2; Task="seq2seq" } } "SentimentWords" { Write-Host "📊 Dataset: SentimentWords (sequence classification)" -ForegroundColor Cyan Write-Host " Task: classify word sequence as positive/negative" -ForegroundColor Cyan # Simple word embeddings (2D for teaching) $vocab = @{ "good"=@(0.8,0.2); "great"=@(0.9,0.1); "excellent"=@(1.0,0.0) "happy"=@(0.7,0.3); "love"=@(0.85,0.15); "wonderful"=@(0.95,0.05) "bad"=@(0.1,0.9); "terrible"=@(0.0,1.0); "awful"=@(0.05,0.95) "sad"=@(0.2,0.8); "hate"=@(0.1,0.9); "horrible"=@(0.0,0.95) "movie"=@(0.5,0.5); "film"=@(0.5,0.5); "was"=@(0.5,0.5) } $posSentences = @( @("the","movie","was","great"), @("wonderful","film"), @("great","excellent","good"), @("love","this","film"), @("happy","wonderful","movie") ) $negSentences = @( @("the","movie","was","terrible"), @("horrible","film"), @("bad","awful","sad"), @("hate","this","film"), @("sad","terrible","movie") ) $seqs = @(); $labels = @() foreach ($sent in $posSentences) { $seq = @() foreach ($w in $sent) { $emb = if ($vocab.ContainsKey($w)) { $vocab[$w] } else { @(0.5, 0.5) } $seq += ,@($emb[0], $emb[1]) } $seqs += ,$seq $labels += 1 # positive } foreach ($sent in $negSentences) { $seq = @() foreach ($w in $sent) { $emb = if ($vocab.ContainsKey($w)) { $vocab[$w] } else { @(0.5, 0.5) } $seq += ,@($emb[0], $emb[1]) } $seqs += ,$seq $labels += 0 # negative } return @{ Sequences=$seqs; Labels=$labels; InputSize=2; Task="classification" } } default { Write-Host "❌ Unknown: $Name" -ForegroundColor Red Write-Host " Available: SineWave, BinaryAdd, SentimentWords" -ForegroundColor Yellow return $null } } } # ============================================================ # ARCHITECTURE COMPARISON UTILITY # ============================================================ function Compare-RNNArchitectures { param([double[][]]$sequence) $n = $sequence.Length Write-Host "" Write-Host "⚖️ RNN Architecture Comparison" -ForegroundColor Green Write-Host (" Sequence length : {0}" -f $n) -ForegroundColor Cyan Write-Host (" Input size : {0}" -f $sequence[0].Length) -ForegroundColor Cyan Write-Host "" $hiddenSize = 8 $inputSize = $sequence[0].Length # BasicRNN $rnn = [BasicRNNCell]::new($inputSize, $hiddenSize) $rnnOut = $rnn.Forward($sequence) $rnnParams = $hiddenSize * $inputSize + $hiddenSize * $hiddenSize + $hiddenSize # LSTM $lstm = [LSTMCell]::new($inputSize, $hiddenSize) $lstmOut = $lstm.Forward($sequence) $combined = $inputSize + $hiddenSize $lstmParams = 4 * ($hiddenSize * $combined + $hiddenSize) # GRU $gru = [GRUCell]::new($inputSize, $hiddenSize) $gruOut = $gru.Forward($sequence) $gruParams = 3 * ($hiddenSize * $combined + $hiddenSize) # Bidirectional LSTM $biLSTM = [BidirectionalRNN]::new("LSTM", $inputSize, $hiddenSize) $biOut = $biLSTM.Forward($sequence) Write-Host (" {0,-20} {1,8} {2,12} {3,10}" -f "Architecture", "Params", "Output Size", "Memory") -ForegroundColor Yellow Write-Host (" {0}" -f ("-" * 55)) -ForegroundColor DarkGray Write-Host (" {0,-20} {1,8} {2,12} {3,10}" -f "BasicRNN", $rnnParams, $hiddenSize, "Short") -ForegroundColor White Write-Host (" {0,-20} {1,8} {2,12} {3,10}" -f "LSTM", $lstmParams, $hiddenSize, "Long") -ForegroundColor Green Write-Host (" {0,-20} {1,8} {2,12} {3,10}" -f "GRU", $gruParams, $hiddenSize, "Medium") -ForegroundColor Cyan Write-Host (" {0,-20} {1,8} {2,12} {3,10}" -f "Bidirect-LSTM", ($lstmParams * 2), ($hiddenSize * 2), "Long+Context") -ForegroundColor Yellow Write-Host "" Write-Host " 💡 Rule of thumb:" -ForegroundColor DarkGray Write-Host " Short sequences -> BasicRNN or GRU" -ForegroundColor DarkGray Write-Host " Long sequences -> LSTM" -ForegroundColor DarkGray Write-Host " Need context -> Bidirectional" -ForegroundColor DarkGray Write-Host " Seq translation -> Seq2Seq + Attention" -ForegroundColor DarkGray Write-Host "" } # ============================================================ # TEST # 1. Run VBAF.LoadAll.ps1 # # --- BasicRNN forward pass --- # 2. $rnn = [BasicRNNCell]::new(1, 8) # $rnn.PrintSummary() # $data = Get-VBAFSequenceDataset -Name "SineWave" # $out = $rnn.Forward($data.Sequences[0]) # Write-Host "Processed $($data.Sequences[0].Length) steps, output size: $($out[0].Length)" # # --- LSTM forward pass --- # 3. $lstm = [LSTMCell]::new(1, 8) # $lstm.PrintSummary() # $out2 = $lstm.Forward($data.Sequences[0]) # $lstm.PrintGateActivity(5) # show gates at step 5 # # --- GRU forward pass --- # 4. $gru = [GRUCell]::new(1, 8) # $gru.PrintSummary() # $out3 = $gru.Forward($data.Sequences[0]) # # --- Bidirectional --- # 5. $bi = [BidirectionalRNN]::new("LSTM", 1, 8) # $bi.PrintSummary() # $biOut = $bi.Forward($data.Sequences[0]) # Write-Host "Bidirectional output size: $($biOut[0].Length) (should be 16 = 8*2)" # # --- Attention --- # 6. $attn = [DotProductAttention]::new() # $keys = $out2 # LSTM hidden states as keys # $query = $out2[-1] # last hidden state as query # $context = $attn.Forward($query, $keys, $keys) # $attn.PrintAttentionMap(@("t0","t1","t2","t3","t4","t5","t6","t7","t8","t9")) # # --- Architecture comparison --- # 7. Compare-RNNArchitectures -sequence $data.Sequences[0] # # --- Seq2Seq --- # 8. $s2s = [Seq2SeqModel]::new(2, 8, 1) # $s2s.PrintSummary() # $binData = Get-VBAFSequenceDataset -Name "BinaryAdd" # $ctx = $s2s.Encode($binData.Sequences[0]) # $decoded = $s2s.Decode($ctx, 5) # Write-Host "Encoded and decoded binary addition sequence" # # --- Sentiment classification --- # 9. $sentData = Get-VBAFSequenceDataset -Name "SentimentWords" # $lstm2 = [LSTMCell]::new(2, 8) # $outLayer = [RNNOutputLayer]::new(8, 2, "softmax") # $correct = 0 # for ($i = 0; $i -lt $sentData.Sequences.Length; $i++) { # $hiddens = $lstm2.Forward($sentData.Sequences[$i]) # $probs = $outLayer.Forward($hiddens[-1]) # $pred = if ($probs[0] -gt $probs[1]) { 0 } else { 1 } # if ($pred -eq $sentData.Labels[$i]) { $correct++ } # } # Write-Host "Sentiment accuracy (untrained): $correct / $($sentData.Sequences.Length)" # ============================================================ Write-Host "📦 VBAF.ML.RNN.ps1 loaded [Phase 6 🧠]" -ForegroundColor Green Write-Host " Classes : BasicRNNCell" -ForegroundColor Cyan Write-Host " LSTMCell" -ForegroundColor Cyan Write-Host " GRUCell" -ForegroundColor Cyan Write-Host " BidirectionalRNN" -ForegroundColor Cyan Write-Host " DotProductAttention" -ForegroundColor Cyan Write-Host " Seq2SeqModel" -ForegroundColor Cyan Write-Host " RNNOutputLayer" -ForegroundColor Cyan Write-Host " Functions : Compare-RNNArchitectures" -ForegroundColor Cyan Write-Host " Get-VBAFSequenceDataset" -ForegroundColor Cyan Write-Host " Invoke-GradientClip" -ForegroundColor Cyan Write-Host "" Write-Host " Quick start:" -ForegroundColor Yellow Write-Host ' $lstm = [LSTMCell]::new(1, 8)' -ForegroundColor White Write-Host ' $lstm.PrintSummary()' -ForegroundColor White Write-Host ' $data = Get-VBAFSequenceDataset -Name "SineWave"' -ForegroundColor White Write-Host ' $out = $lstm.Forward($data.Sequences[0])' -ForegroundColor White Write-Host ' Write-Host "Steps: $($out.Length) Hidden: $($out[0].Length)"' -ForegroundColor White Write-Host "" |