VBAF

2.1.0

VBAF.ML.RNN.ps1

                                #Requires -Version 5.1

<#

.SYNOPSIS

    Recurrent Neural Networks - Sequence Learning Architectures

.DESCRIPTION

    Implements recurrent architectures from scratch.

    Designed as a TEACHING resource - every gate explained.

    Architectures included:

      - BasicRNN         : simple recurrent cell, vanishing gradient problem

      - LSTM             : Long Short-Term Memory, forget/input/output gates

      - GRU              : Gated Recurrent Unit, simpler than LSTM

      - BidirectionalRNN : processes sequence forward AND backward

      - Seq2Seq          : encoder-decoder for sequence translation

      - Attention        : learn WHICH part of input to focus on

    Utilities:

      - Sequence datasets : sine wave, text, number sequences

      - Gradient clipping : prevent exploding gradients

      - Teacher forcing   : seq2seq training trick

.NOTES

    Part of VBAF - Phase 6 Deep Learning Module

    PS 5.1 compatible - pure PowerShell, no dependencies

    Teaching project - every gate equation shown step by step!

#>

$basePath = $PSScriptRoot

# ============================================================

# TEACHING NOTE: Why Recurrent Networks?

# Standard networks treat each input INDEPENDENTLY.

# But sequences have CONTEXT - what came before matters!

#

# "The cat sat on the ___" -> "mat" (context from earlier words)

# Stock price tomorrow depends on prices OVER TIME

# Music note depends on what was played BEFORE

#

# RNNs maintain a HIDDEN STATE - a memory of past inputs.

# At each step: h_t = f(x_t, h_{t-1})

# The hidden state carries information forward through time.

#

# Problem: Basic RNNs suffer from VANISHING GRADIENTS.

# Information from many steps ago fades away.

# LSTM and GRU solve this with GATES that control memory flow.

# ============================================================

# ============================================================

# ACTIVATION FUNCTIONS

# ============================================================

function Invoke-RNNSigmoid { param([double[]]$x)

    return $x | ForEach-Object { 1.0 / (1.0 + [Math]::Exp(-[Math]::Max(-500, [Math]::Min(500, $_)))) }

}

function Invoke-RNNTanh { param([double[]]$x)

    return $x | ForEach-Object {

        $e2 = [Math]::Exp(2 * [Math]::Max(-250, [Math]::Min(250, $_)))

        ($e2 - 1) / ($e2 + 1)

    }

}

function Invoke-RNNSoftmax { param([double[]]$x)

    $maxV = ($x | Measure-Object -Maximum).Maximum

    $exps = $x | ForEach-Object { [Math]::Exp($_ - $maxV) }

    $sumE = ($exps | Measure-Object -Sum).Sum

    return $exps | ForEach-Object { $_ / $sumE }

}

# Vector operations

function Add-Vectors { param([double[]]$a, [double[]]$b)

    $r = @(0.0) * $a.Length

    for ($i = 0; $i -lt $a.Length; $i++) { $r[$i] = $a[$i] + $b[$i] }

    return $r

}

function Mul-Vectors { param([double[]]$a, [double[]]$b)

    $r = @(0.0) * $a.Length

    for ($i = 0; $i -lt $a.Length; $i++) { $r[$i] = $a[$i] * $b[$i] }

    return $r

}

# Matrix-vector multiply: W (rows x cols) * x (cols) -> (rows)

function MatVec { param([double[]]$W, [double[]]$x, [int]$rows, [int]$cols)

    $r = @(0.0) * $rows

    for ($i = 0; $i -lt $rows; $i++) {

        $sum = 0.0

        for ($j = 0; $j -lt $cols; $j++) { $sum += $W[$i * $cols + $j] * $x[$j] }

        $r[$i] = $sum

    }

    return $r

}

# Random weight matrix initialization (Xavier)

function New-RNNWeights { param([int]$rows, [int]$cols, [int]$seed = 42)

    $rng   = [System.Random]::new($seed)

    $scale = [Math]::Sqrt(2.0 / ($rows + $cols))

    $W     = @(0.0) * ($rows * $cols)

    for ($i = 0; $i -lt $W.Length; $i++) {

        $W[$i] = ($rng.NextDouble() * 2 - 1) * $scale

    }

    return $W

}

# Gradient clipping - prevent exploding gradients

function Invoke-GradientClip { param([double[]]$grads, [double]$threshold = 1.0)

    $norm = 0.0

    foreach ($g in $grads) { $norm += $g * $g }

    $norm = [Math]::Sqrt($norm)

    if ($norm -gt $threshold) {

        $scale = $threshold / $norm

        return $grads | ForEach-Object { $_ * $scale }

    }

    return $grads

}

# ============================================================

# BASIC RNN CELL

# ============================================================

# TEACHING NOTE: The simplest recurrent cell.

# At each timestep t:

#   h_t = tanh(W_xh * x_t + W_hh * h_{t-1} + b_h)

#   y_t = W_hy * h_t + b_y

#

# W_xh : input  -> hidden weights

# W_hh : hidden -> hidden weights (the recurrent connection!)

# W_hy : hidden -> output weights

#

# PROBLEM: tanh gradient < 1, so after many timesteps

# gradients shrink to zero = VANISHING GRADIENT.

# The network forgets events from many steps ago!

# ============================================================

class BasicRNNCell {

    [int]      $InputSize

    [int]      $HiddenSize

    [double[]] $Wxh        # input->hidden

    [double[]] $Whh        # hidden->hidden

    [double[]] $Bh         # hidden bias

    [double[]] $H          # current hidden state

    [System.Collections.ArrayList] $HHistory  # hidden states over time

    BasicRNNCell([int]$inputSize, [int]$hiddenSize) {

        $this.InputSize  = $inputSize

        $this.HiddenSize = $hiddenSize

        $this.Wxh = New-RNNWeights -rows $hiddenSize -cols $inputSize  -seed 42

        $this.Whh = New-RNNWeights -rows $hiddenSize -cols $hiddenSize -seed 43

        $this.Bh  = @(0.0) * $hiddenSize

        $this.H   = @(0.0) * $hiddenSize

        $this.HHistory = [System.Collections.ArrayList]::new()

    }

    [void] Reset() {

        $this.H = @(0.0) * $this.HiddenSize

        $this.HHistory.Clear()

    }

    # One step forward

    [double[]] Step([double[]]$x) {

        $xh   = MatVec $this.Wxh $x  $this.HiddenSize $this.InputSize

        $hh   = MatVec $this.Whh $this.H $this.HiddenSize $this.HiddenSize

        $preact = Add-Vectors (Add-Vectors $xh $hh) $this.Bh

        $this.H = Invoke-RNNTanh $preact

        $this.HHistory.Add($this.H.Clone()) | Out-Null

        return $this.H

    }

    # Process full sequence, return all hidden states

    [double[][]] Forward([double[][]]$sequence) {

        $this.Reset()

        $outputs = @()

        foreach ($x in $sequence) {

            $stepOut = $this.Step($x)

            $outputs += ,$stepOut

        }

        return $outputs

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║         Basic RNN Cell               ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Input size  : {0,-22}║" -f $this.InputSize)  -ForegroundColor White

        Write-Host ("║  Hidden size : {0,-22}║" -f $this.HiddenSize) -ForegroundColor White

        $params = $this.HiddenSize * $this.InputSize + $this.HiddenSize * $this.HiddenSize + $this.HiddenSize

        Write-Host ("║  Parameters  : {0,-22}║" -f $params)          -ForegroundColor Yellow

        Write-Host ("║  Equation    : h=tanh(Wx+Uh+b){0,-8}║" -f "") -ForegroundColor DarkGray

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

}

# ============================================================

# LSTM CELL

# ============================================================

# TEACHING NOTE: LSTM solves the vanishing gradient problem!

# Key idea: a CELL STATE (c_t) acts as a "conveyor belt"

# carrying information through time with minimal modification.

#

# Three GATES control information flow:

#

#   FORGET GATE:  f_t = sigmoid(W_f * [h_{t-1}, x_t] + b_f)

#   "How much of the old cell state do we keep?"

#   f_t=0 : forget everything, f_t=1 : keep everything

#

#   INPUT GATE:   i_t = sigmoid(W_i * [h_{t-1}, x_t] + b_i)

#   g_t   = tanh(W_g * [h_{t-1}, x_t] + b_g)

#   "What new information do we store in the cell state?"

#

#   OUTPUT GATE:  o_t = sigmoid(W_o * [h_{t-1}, x_t] + b_o)

#   "What do we output based on the cell state?"

#

#   CELL UPDATE:  c_t = f_t * c_{t-1} + i_t * g_t

#   HIDDEN STATE: h_t = o_t * tanh(c_t)

#

# The cell state highway lets gradients flow without vanishing!

# ============================================================

class LSTMCell {

    [int]      $InputSize

    [int]      $HiddenSize

    # Gate weights [hidden+input] -> hidden

    [double[]] $Wf   # forget gate

    [double[]] $Wi   # input gate

    [double[]] $Wg   # cell gate (candidate)

    [double[]] $Wo   # output gate

    [double[]] $Bf   # forget bias

    [double[]] $Bi   # input bias

    [double[]] $Bg   # cell bias

    [double[]] $Bo   # output bias

    [double[]] $H    # hidden state

    [double[]] $C    # cell state

    [System.Collections.ArrayList] $HHistory

    [System.Collections.ArrayList] $CHistory

    LSTMCell([int]$inputSize, [int]$hiddenSize) {

        $this.InputSize  = $inputSize

        $this.HiddenSize = $hiddenSize

        $combined        = $inputSize + $hiddenSize

        # Each gate: (hidden+input) -> hidden

        $this.Wf = New-RNNWeights -rows $hiddenSize -cols $combined -seed 10

        $this.Wi = New-RNNWeights -rows $hiddenSize -cols $combined -seed 11

        $this.Wg = New-RNNWeights -rows $hiddenSize -cols $combined -seed 12

        $this.Wo = New-RNNWeights -rows $hiddenSize -cols $combined -seed 13

        $this.Bf = @(1.0) * $hiddenSize  # forget bias=1 helps remember by default

        $this.Bi = @(0.0) * $hiddenSize

        $this.Bg = @(0.0) * $hiddenSize

        $this.Bo = @(0.0) * $hiddenSize

        $this.H  = @(0.0) * $hiddenSize

        $this.C  = @(0.0) * $hiddenSize

        $this.HHistory = [System.Collections.ArrayList]::new()

        $this.CHistory = [System.Collections.ArrayList]::new()

    }

    [void] Reset() {

        $this.H = @(0.0) * $this.HiddenSize

        $this.C = @(0.0) * $this.HiddenSize

        $this.HHistory.Clear()

        $this.CHistory.Clear()

    }

    [double[]] Step([double[]]$x) {

        # Concatenate [h_{t-1}, x_t]

        $combined = $this.HiddenSize + $this.InputSize

        $hx = @(0.0) * $combined

        for ($i = 0; $i -lt $this.HiddenSize; $i++) { $hx[$i] = $this.H[$i] }

        for ($i = 0; $i -lt $this.InputSize;  $i++) { $hx[$this.HiddenSize + $i] = $x[$i] }

        # Gates

        $fRaw = Add-Vectors (MatVec $this.Wf $hx $this.HiddenSize $combined) $this.Bf

        $iRaw = Add-Vectors (MatVec $this.Wi $hx $this.HiddenSize $combined) $this.Bi

        $gRaw = Add-Vectors (MatVec $this.Wg $hx $this.HiddenSize $combined) $this.Bg

        $oRaw = Add-Vectors (MatVec $this.Wo $hx $this.HiddenSize $combined) $this.Bo

        $f = Invoke-RNNSigmoid $fRaw  # forget gate

        $i = Invoke-RNNSigmoid $iRaw  # input gate

        $g = Invoke-RNNTanh    $gRaw  # candidate cell

        $o = Invoke-RNNSigmoid $oRaw  # output gate

        # Cell state update: c_t = f * c_{t-1} + i * g

        $this.C = Add-Vectors (Mul-Vectors $f $this.C) (Mul-Vectors $i $g)

        # Hidden state: h_t = o * tanh(c_t)

        $this.H = Mul-Vectors $o (Invoke-RNNTanh $this.C)

        $this.HHistory.Add($this.H.Clone()) | Out-Null

        $this.CHistory.Add($this.C.Clone()) | Out-Null

        return $this.H

    }

    [double[][]] Forward([double[][]]$sequence) {

        $this.Reset()

        $outputs = @()

        foreach ($x in $sequence) {

            $stepOut = $this.Step($x)

            $outputs += ,$stepOut

        }

        return $outputs

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║            LSTM Cell                 ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Input size  : {0,-22}║" -f $this.InputSize)  -ForegroundColor White

        Write-Host ("║  Hidden size : {0,-22}║" -f $this.HiddenSize) -ForegroundColor White

        $combined = $this.InputSize + $this.HiddenSize

        $params   = 4 * ($this.HiddenSize * $combined + $this.HiddenSize)

        Write-Host ("║  Parameters  : {0,-22}║" -f $params)          -ForegroundColor Yellow

        Write-Host ("║  Gates       : forget,input,cell,out{0,-1}║" -f "") -ForegroundColor DarkGray

        Write-Host ("║  Cell state  : long-term memory{0,-6}║" -f "") -ForegroundColor DarkGray

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

    [void] PrintGateActivity([int]$step) {

        if ($step -ge $this.HHistory.Count) { Write-Host "Step out of range" -ForegroundColor Red; return }

        $hState = $this.HHistory[$step]

        $cState = $this.CHistory[$step]

        Write-Host ""

        Write-Host ("🔦 LSTM Gate Activity at step {0}:" -f $step) -ForegroundColor Green

        Write-Host "   Hidden state (h):" -ForegroundColor Cyan -NoNewline

        for ($i = 0; $i -lt [Math]::Min(8, $hState.Length); $i++) {

            $bar = if ($hState[$i] -gt 0) { "+" } else { "-" }

            Write-Host (" {0}{1:F2}" -f $bar, [Math]::Abs($hState[$i])) -ForegroundColor White -NoNewline

        }

        Write-Host ""

        Write-Host "   Cell state  (c):" -ForegroundColor Cyan -NoNewline

        for ($i = 0; $i -lt [Math]::Min(8, $cState.Length); $i++) {

            $bar = if ($cState[$i] -gt 0) { "+" } else { "-" }

            Write-Host (" {0}{1:F2}" -f $bar, [Math]::Abs($cState[$i])) -ForegroundColor Yellow -NoNewline

        }

        Write-Host ""

        Write-Host ""

    }

}

# ============================================================

# GRU CELL

# ============================================================

# TEACHING NOTE: GRU = Gated Recurrent Unit (2014)

# Simpler than LSTM - only 2 gates, no separate cell state.

# Often performs as well as LSTM with fewer parameters!

#

#   RESET GATE:  r_t = sigmoid(W_r * [h_{t-1}, x_t])

#   "How much of past hidden state do we use?"

#   r_t=0 : ignore past completely (start fresh)

#

#   UPDATE GATE: z_t = sigmoid(W_z * [h_{t-1}, x_t])

#   "How much do we update the hidden state?"

#   z_t=0 : keep old state, z_t=1 : use new candidate

#

#   CANDIDATE:   h~_t = tanh(W * [r_t * h_{t-1}, x_t])

#   HIDDEN:      h_t = (1-z_t) * h_{t-1} + z_t * h~_t

#

# GRU vs LSTM:

#   LSTM: 4 weight matrices, separate cell state

#   GRU : 3 weight matrices, single hidden state

#   Rule of thumb: try GRU first, use LSTM if more memory needed

# ============================================================

class GRUCell {

    [int]      $InputSize

    [int]      $HiddenSize

    [double[]] $Wr    # reset gate

    [double[]] $Wz    # update gate

    [double[]] $Wh    # candidate hidden

    [double[]] $Br

    [double[]] $Bz

    [double[]] $Bh

    [double[]] $H

    [System.Collections.ArrayList] $HHistory

    GRUCell([int]$inputSize, [int]$hiddenSize) {

        $this.InputSize  = $inputSize

        $this.HiddenSize = $hiddenSize

        $combined        = $inputSize + $hiddenSize

        $this.Wr = New-RNNWeights -rows $hiddenSize -cols $combined -seed 20

        $this.Wz = New-RNNWeights -rows $hiddenSize -cols $combined -seed 21

        $this.Wh = New-RNNWeights -rows $hiddenSize -cols $combined -seed 22

        $this.Br = @(0.0) * $hiddenSize

        $this.Bz = @(0.0) * $hiddenSize

        $this.Bh = @(0.0) * $hiddenSize

        $this.H  = @(0.0) * $hiddenSize

        $this.HHistory = [System.Collections.ArrayList]::new()

    }

    [void] Reset() {

        $this.H = @(0.0) * $this.HiddenSize

        $this.HHistory.Clear()

    }

    [double[]] Step([double[]]$x) {

        $combined = $this.HiddenSize + $this.InputSize

        # Concatenate [h, x]

        $hx = @(0.0) * $combined

        for ($i = 0; $i -lt $this.HiddenSize; $i++) { $hx[$i] = $this.H[$i] }

        for ($i = 0; $i -lt $this.InputSize;  $i++) { $hx[$this.HiddenSize + $i] = $x[$i] }

        # Reset and update gates

        $r = Invoke-RNNSigmoid (Add-Vectors (MatVec $this.Wr $hx $this.HiddenSize $combined) $this.Br)

        $z = Invoke-RNNSigmoid (Add-Vectors (MatVec $this.Wz $hx $this.HiddenSize $combined) $this.Bz)

        # Candidate: [r * h, x]

        $rh = Mul-Vectors $r $this.H

        $rhx = @(0.0) * $combined

        for ($i = 0; $i -lt $this.HiddenSize; $i++) { $rhx[$i] = $rh[$i] }

        for ($i = 0; $i -lt $this.InputSize;  $i++) { $rhx[$this.HiddenSize + $i] = $x[$i] }

        $hCand = Invoke-RNNTanh (Add-Vectors (MatVec $this.Wh $rhx $this.HiddenSize $combined) $this.Bh)

        # Update: h_t = (1-z)*h + z*h_cand

        $newH = @(0.0) * $this.HiddenSize

        for ($i = 0; $i -lt $this.HiddenSize; $i++) {

            $newH[$i] = (1 - $z[$i]) * $this.H[$i] + $z[$i] * $hCand[$i]

        }

        $this.H = $newH

        $this.HHistory.Add($this.H.Clone()) | Out-Null

        return $this.H

    }

    [double[][]] Forward([double[][]]$sequence) {

        $this.Reset()

        $outputs = @()

        foreach ($x in $sequence) { $outputs += ,$this.Step($x) }

        return $outputs

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║             GRU Cell                 ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Input size  : {0,-22}║" -f $this.InputSize)  -ForegroundColor White

        Write-Host ("║  Hidden size : {0,-22}║" -f $this.HiddenSize) -ForegroundColor White

        $combined = $this.InputSize + $this.HiddenSize

        $params   = 3 * ($this.HiddenSize * $combined + $this.HiddenSize)

        Write-Host ("║  Parameters  : {0,-22}║" -f $params)          -ForegroundColor Yellow

        Write-Host ("║  Gates       : reset, update{0,-9}║" -f "")   -ForegroundColor DarkGray

        Write-Host ("║  vs LSTM     : 25% fewer params{0,-5}║" -f "") -ForegroundColor Green

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

}

# ============================================================

# BIDIRECTIONAL RNN WRAPPER

# ============================================================

# TEACHING NOTE: Standard RNN only sees the PAST.

# Bidirectional processes the sequence BOTH ways:

#   Forward  pass: x_1 -> x_2 -> x_3 ... x_T

#   Backward pass: x_T -> x_{T-1} ... x_1

# Then concatenates both hidden states at each timestep.

#

# Why? Some tasks need future context too!

# "The bank was steep" vs "The bank was closed"

# The word "bank" meaning depends on what comes AFTER it!

# ============================================================

class BidirectionalRNN {

    [object] $ForwardCell

    [object] $BackwardCell

    [string] $CellType     # "RNN", "LSTM", "GRU"

    [int]    $InputSize

    [int]    $HiddenSize

    BidirectionalRNN([string]$cellType, [int]$inputSize, [int]$hiddenSize) {

        $this.CellType   = $cellType

        $this.InputSize  = $inputSize

        $this.HiddenSize = $hiddenSize

        switch ($cellType) {

            "LSTM" {

                $this.ForwardCell  = [LSTMCell]::new($inputSize, $hiddenSize)

                $this.BackwardCell = [LSTMCell]::new($inputSize, $hiddenSize)

            }

            "GRU" {

                $this.ForwardCell  = [GRUCell]::new($inputSize, $hiddenSize)

                $this.BackwardCell = [GRUCell]::new($inputSize, $hiddenSize)

            }

            default {

                $this.ForwardCell  = [BasicRNNCell]::new($inputSize, $hiddenSize)

                $this.BackwardCell = [BasicRNNCell]::new($inputSize, $hiddenSize)

            }

        }

    }

    # Returns concatenated [forward, backward] at each timestep

    [double[][]] Forward([double[][]]$sequence) {

        $n = $sequence.Length

        # Forward pass

        $fwdOutputs = $this.ForwardCell.Forward($sequence)

        # Backward pass (reverse sequence)

        $revSeq = @()

        for ($i = $n-1; $i -ge 0; $i--) { $revSeq += ,$sequence[$i] }

        $bwdOutputsRev = $this.BackwardCell.Forward($revSeq)

        # Reverse backward outputs to align with original positions

        $bwdOutputs = @()

        for ($i = $n-1; $i -ge 0; $i--) { $bwdOutputs += ,$bwdOutputsRev[$i] }

        # Concatenate at each position

        $combined = @()

        for ($i = 0; $i -lt $n; $i++) {

            $fwd  = $fwdOutputs[$i]

            $bwd  = $bwdOutputs[$i]

            $cat  = @(0.0) * ($fwd.Length + $bwd.Length)

            for ($j = 0; $j -lt $fwd.Length; $j++) { $cat[$j] = $fwd[$j] }

            for ($j = 0; $j -lt $bwd.Length; $j++) { $cat[$fwd.Length + $j] = $bwd[$j] }

            $combined += ,$cat

        }

        return $combined

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║       Bidirectional RNN              ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Cell type   : {0,-22}║" -f $this.CellType)          -ForegroundColor White

        Write-Host ("║  Input size  : {0,-22}║" -f $this.InputSize)         -ForegroundColor White

        Write-Host ("║  Hidden size : {0,-22}║" -f $this.HiddenSize)        -ForegroundColor White

        Write-Host ("║  Output size : {0,-22}║" -f ($this.HiddenSize * 2))  -ForegroundColor Yellow

        Write-Host ("║  Direction   : forward + backward{0,-3}║" -f "")     -ForegroundColor DarkGray

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

}

# ============================================================

# ATTENTION MECHANISM

# ============================================================

# TEACHING NOTE: Attention = "Where should I look?"

# Instead of compressing the WHOLE sequence into one vector,

# attention lets the model focus on RELEVANT parts.

#

# For each output step, compute a weight for each input step:

#   score(q, k) = q · k  (dot product attention)

#   weights = softmax(scores)

#   context = sum(weights * values)

#

# Q = Query  : what we're looking for

# K = Keys   : what each encoder step "offers"

# V = Values : what we actually read when we attend

#

# This is the foundation of TRANSFORMERS!

# "Attention is All You Need" (2017) revolutionized NLP.

# ============================================================

class DotProductAttention {

    [double[]] $AttentionWeights  # last computed weights

    DotProductAttention() {}

    # query: (d,)  keys: (T, d)  values: (T, dv)

    [double[]] Forward([double[]]$query, [double[][]]$keys, [double[][]]$values) {

        $T = $keys.Length

        $d = $query.Length

        # Score = query · key_t for each t

        $scores = @(0.0) * $T

        for ($t = 0; $t -lt $T; $t++) {

            $dot = 0.0

            for ($i = 0; $i -lt $d; $i++) { $dot += $query[$i] * $keys[$t][$i] }

            $scores[$t] = $dot / [Math]::Sqrt($d)  # scale by sqrt(d)

        }

        # Attention weights = softmax(scores)

        $this.AttentionWeights = Invoke-RNNSoftmax $scores

        # Context = weighted sum of values

        $dv      = $values[0].Length

        $context = @(0.0) * $dv

        for ($t = 0; $t -lt $T; $t++) {

            for ($i = 0; $i -lt $dv; $i++) {

                $context[$i] += $this.AttentionWeights[$t] * $values[$t][$i]

            }

        }

        return $context

    }

    [void] PrintAttentionMap([string[]]$tokens) {

        Write-Host ""

        Write-Host "🔍 Attention Weights:" -ForegroundColor Green

        $bars = "░▒▓█"

        for ($t = 0; $t -lt $this.AttentionWeights.Length; $t++) {

            $w     = $this.AttentionWeights[$t]

            $barN  = [int]($w * 20)

            $bar   = "█" * $barN

            $tok   = if ($t -lt $tokens.Length) { $tokens[$t] } else { "t$t" }

            $color = if ($w -gt 0.3) { "Green" } elseif ($w -gt 0.1) { "Yellow" } else { "DarkGray" }

            Write-Host ("  {0,-12} {1,6:F4}  {2}" -f $tok, $w, $bar) -ForegroundColor $color

        }

        Write-Host ""

    }

}

# ============================================================

# SEQ2SEQ MODEL (Encoder-Decoder)

# ============================================================

# TEACHING NOTE: Seq2Seq translates one sequence to another.

# Examples: English -> French, Question -> Answer

#

# ENCODER: reads the input sequence, produces a context vector

#   "Summarise the input into a fixed-size memory"

#

# DECODER: generates the output sequence from context

#   "Expand the memory into the output sequence"

#

# TEACHER FORCING: during training, feed the CORRECT previous

# output as the next decoder input (not the predicted one).

# This speeds up training but can cause "exposure bias" at test.

# ============================================================

class Seq2SeqModel {

    [LSTMCell] $Encoder

    [LSTMCell] $Decoder

    [double[]] $Wy       # decoder hidden -> output

    [double[]] $By       # output bias

    [int]      $OutputSize

    [DotProductAttention] $Attention

    Seq2SeqModel([int]$inputSize, [int]$hiddenSize, [int]$outputSize) {

        $this.Encoder    = [LSTMCell]::new($inputSize,  $hiddenSize)

        $this.Decoder    = [LSTMCell]::new($outputSize, $hiddenSize)

        $this.Wy         = New-RNNWeights -rows $outputSize -cols $hiddenSize -seed 99

        $this.By         = @(0.0) * $outputSize

        $this.OutputSize = $outputSize

        $this.Attention  = [DotProductAttention]::new()

    }

    # Encode input sequence -> final hidden state

    [hashtable] Encode([double[][]]$inputSeq) {

        $this.Encoder.Reset()

        foreach ($x in $inputSeq) { $this.Encoder.Step($x) | Out-Null }

        return @{ H=$this.Encoder.H.Clone(); C=$this.Encoder.C.Clone() }

    }

    # Decode: generate output sequence of given length

    [double[][]] Decode([hashtable]$context, [int]$outputLen) {

        # Initialize decoder with encoder final state

        $this.Decoder.H = $context.H

        $this.Decoder.C = $context.C

        $outputs = @()

        $input   = @(0.0) * $this.OutputSize  # start token = zeros

        for ($t = 0; $t -lt $outputLen; $t++) {

            $decOut = $this.Decoder.Step($input)

            $out = Add-Vectors (MatVec $this.Wy $decOut $this.OutputSize $this.Decoder.HiddenSize) $this.By

            $prob = Invoke-RNNSoftmax $out

            $outputs += ,$prob

            $input = $prob  # feed output as next input (no teacher forcing at inference)

        }

        return $outputs

    }

    [void] PrintSummary() {

        Write-Host ""

        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan

        Write-Host "║        Seq2Seq Model                 ║" -ForegroundColor Cyan

        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan

        Write-Host ("║  Encoder     : LSTM({0}->{1}){2,-12}║" -f $this.Encoder.InputSize, $this.Encoder.HiddenSize, "") -ForegroundColor White

        Write-Host ("║  Decoder     : LSTM({0}->{1}){2,-12}║" -f $this.Decoder.InputSize, $this.Decoder.HiddenSize, "") -ForegroundColor White

        Write-Host ("║  Output size : {0,-22}║" -f $this.OutputSize) -ForegroundColor White

        Write-Host ("║  Attention   : DotProduct{0,-12}║" -f "")     -ForegroundColor Yellow

        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan

        Write-Host ""

    }

}

# ============================================================

# OUTPUT LAYER FOR SEQUENCE CLASSIFICATION/REGRESSION

# ============================================================

class RNNOutputLayer {

    [double[]] $W

    [double[]] $B

    [int]      $InputSize

    [int]      $OutputSize

    [string]   $Activation

    RNNOutputLayer([int]$inputSize, [int]$outputSize, [string]$activation) {

        $this.InputSize  = $inputSize

        $this.OutputSize = $outputSize

        $this.Activation = $activation

        $this.W = New-RNNWeights -rows $outputSize -cols $inputSize -seed 77

        $this.B = @(0.0) * $outputSize

    }

    [double[]] Forward([double[]]$h) {

        $raw = Add-Vectors (MatVec $this.W $h $this.OutputSize $this.InputSize) $this.B

        $out = switch ($this.Activation) {

            "softmax" { Invoke-RNNSoftmax $raw }

            "sigmoid" { Invoke-RNNSigmoid $raw }

            "tanh"    { Invoke-RNNTanh    $raw }

            default   { $raw }

        }

        return $out

    }

}

# ============================================================

# BUILT-IN DATASETS

# ============================================================

function Get-VBAFSequenceDataset {

    param([string]$Name = "SineWave")

    $rng = [System.Random]::new(42)

    switch ($Name) {

        "SineWave" {

            Write-Host "📊 Dataset: SineWave (predict next value)" -ForegroundColor Cyan

            Write-Host "   Task: given 10 values, predict the 11th" -ForegroundColor Cyan

            $n        = 200

            $seqLen   = 10

            $step     = 0.1

            $values   = @()

            for ($i = 0; $i -lt ($n + $seqLen + 1); $i++) {

                $values += [Math]::Sin($i * $step) + ($rng.NextDouble() - 0.5) * 0.1

            }

            $sequences = @(); $targets = @()

            for ($i = 0; $i -lt $n; $i++) {

                $seq = @()

                for ($j = 0; $j -lt $seqLen; $j++) {

                    $seq += ,@($values[$i + $j])  # single feature

                }

                $sequences += ,$seq

                $targets   += $values[$i + $seqLen]

            }

            return @{ Sequences=$sequences; Targets=$targets; SeqLen=$seqLen; InputSize=1; Task="regression" }

        }

        "BinaryAdd" {

            Write-Host "📊 Dataset: BinaryAdd (seq2seq)" -ForegroundColor Cyan

            Write-Host "   Task: add two 4-bit binary numbers -> 5-bit result" -ForegroundColor Cyan

            $seqs   = @(); $targets = @()

            for ($i = 0; $i -lt 50; $i++) {

                $a  = $rng.Next(0, 16)

                $b  = $rng.Next(0, 16)

                $cSum = $a + $b

                # Encode as bit sequences (LSB first)

                $aSeq = @(); $bSeq = @(); $cSeq = @()

                for ($bit = 0; $bit -lt 4; $bit++) {

                    $aSeq += ,@([double](($a -shr $bit) -band 1), [double](($b -shr $bit) -band 1))

                }

                for ($bit = 0; $bit -lt 5; $bit++) {

                    $cSeq += ,@([double](($cSum -shr $bit) -band 1))

                }

                $seqs   += ,$aSeq

                $targets += ,$cSeq

            }

            return @{ Sequences=$seqs; Targets=$targets; SeqLen=4; InputSize=2; Task="seq2seq" }

        }

        "SentimentWords" {

            Write-Host "📊 Dataset: SentimentWords (sequence classification)" -ForegroundColor Cyan

            Write-Host "   Task: classify word sequence as positive/negative" -ForegroundColor Cyan

            # Simple word embeddings (2D for teaching)

            $vocab = @{

                "good"=@(0.8,0.2); "great"=@(0.9,0.1); "excellent"=@(1.0,0.0)

                "happy"=@(0.7,0.3); "love"=@(0.85,0.15); "wonderful"=@(0.95,0.05)

                "bad"=@(0.1,0.9);   "terrible"=@(0.0,1.0); "awful"=@(0.05,0.95)

                "sad"=@(0.2,0.8);   "hate"=@(0.1,0.9);  "horrible"=@(0.0,0.95)

                "movie"=@(0.5,0.5); "film"=@(0.5,0.5);  "was"=@(0.5,0.5)

            }

            $posSentences = @(

                @("the","movie","was","great"),

                @("wonderful","film"),

                @("great","excellent","good"),

                @("love","this","film"),

                @("happy","wonderful","movie")

            )

            $negSentences = @(

                @("the","movie","was","terrible"),

                @("horrible","film"),

                @("bad","awful","sad"),

                @("hate","this","film"),

                @("sad","terrible","movie")

            )

            $seqs = @(); $labels = @()

            foreach ($sent in $posSentences) {

                $seq = @()

                foreach ($w in $sent) {

                    $emb = if ($vocab.ContainsKey($w)) { $vocab[$w] } else { @(0.5, 0.5) }

                    $seq += ,@($emb[0], $emb[1])

                }

                $seqs  += ,$seq

                $labels += 1  # positive

            }

            foreach ($sent in $negSentences) {

                $seq = @()

                foreach ($w in $sent) {

                    $emb = if ($vocab.ContainsKey($w)) { $vocab[$w] } else { @(0.5, 0.5) }

                    $seq += ,@($emb[0], $emb[1])

                }

                $seqs  += ,$seq

                $labels += 0  # negative

            }

            return @{ Sequences=$seqs; Labels=$labels; InputSize=2; Task="classification" }

        }

        default {

            Write-Host "❌ Unknown: $Name" -ForegroundColor Red

            Write-Host "   Available: SineWave, BinaryAdd, SentimentWords" -ForegroundColor Yellow

            return $null

        }

    }

}

# ============================================================

# ARCHITECTURE COMPARISON UTILITY

# ============================================================

function Compare-RNNArchitectures {

    param([double[][]]$sequence)

    $n = $sequence.Length

    Write-Host ""

    Write-Host "⚖️  RNN Architecture Comparison" -ForegroundColor Green

    Write-Host ("   Sequence length : {0}" -f $n)      -ForegroundColor Cyan

    Write-Host ("   Input size      : {0}" -f $sequence[0].Length) -ForegroundColor Cyan

    Write-Host ""

    $hiddenSize = 8

    $inputSize  = $sequence[0].Length

    # BasicRNN

    $rnn     = [BasicRNNCell]::new($inputSize, $hiddenSize)

    $rnnOut  = $rnn.Forward($sequence)

    $rnnParams = $hiddenSize * $inputSize + $hiddenSize * $hiddenSize + $hiddenSize

    # LSTM

    $lstm    = [LSTMCell]::new($inputSize, $hiddenSize)

    $lstmOut = $lstm.Forward($sequence)

    $combined = $inputSize + $hiddenSize

    $lstmParams = 4 * ($hiddenSize * $combined + $hiddenSize)

    # GRU

    $gru     = [GRUCell]::new($inputSize, $hiddenSize)

    $gruOut  = $gru.Forward($sequence)

    $gruParams = 3 * ($hiddenSize * $combined + $hiddenSize)

    # Bidirectional LSTM

    $biLSTM  = [BidirectionalRNN]::new("LSTM", $inputSize, $hiddenSize)

    $biOut   = $biLSTM.Forward($sequence)

    Write-Host ("  {0,-20} {1,8} {2,12} {3,10}" -f "Architecture", "Params", "Output Size", "Memory") -ForegroundColor Yellow

    Write-Host ("  {0}" -f ("-" * 55)) -ForegroundColor DarkGray

    Write-Host ("  {0,-20} {1,8} {2,12} {3,10}" -f "BasicRNN",      $rnnParams,       $hiddenSize,       "Short") -ForegroundColor White

    Write-Host ("  {0,-20} {1,8} {2,12} {3,10}" -f "LSTM",          $lstmParams,      $hiddenSize,       "Long")  -ForegroundColor Green

    Write-Host ("  {0,-20} {1,8} {2,12} {3,10}" -f "GRU",           $gruParams,       $hiddenSize,       "Medium") -ForegroundColor Cyan

    Write-Host ("  {0,-20} {1,8} {2,12} {3,10}" -f "Bidirect-LSTM", ($lstmParams * 2), ($hiddenSize * 2), "Long+Context") -ForegroundColor Yellow

    Write-Host ""

    Write-Host "  💡 Rule of thumb:" -ForegroundColor DarkGray

    Write-Host "     Short sequences  -> BasicRNN or GRU" -ForegroundColor DarkGray

    Write-Host "     Long sequences   -> LSTM" -ForegroundColor DarkGray

    Write-Host "     Need context     -> Bidirectional" -ForegroundColor DarkGray

    Write-Host "     Seq translation  -> Seq2Seq + Attention" -ForegroundColor DarkGray

    Write-Host ""

}

# ============================================================

# TEST

# 1. Run VBAF.LoadAll.ps1

#

# --- BasicRNN forward pass ---

# 2. $rnn  = [BasicRNNCell]::new(1, 8)

#    $rnn.PrintSummary()

#    $data  = Get-VBAFSequenceDataset -Name "SineWave"

#    $out   = $rnn.Forward($data.Sequences[0])

#    Write-Host "Processed $($data.Sequences[0].Length) steps, output size: $($out[0].Length)"

#

# --- LSTM forward pass ---

# 3. $lstm = [LSTMCell]::new(1, 8)

#    $lstm.PrintSummary()

#    $out2  = $lstm.Forward($data.Sequences[0])

#    $lstm.PrintGateActivity(5)  # show gates at step 5

#

# --- GRU forward pass ---

# 4. $gru  = [GRUCell]::new(1, 8)

#    $gru.PrintSummary()

#    $out3  = $gru.Forward($data.Sequences[0])

#

# --- Bidirectional ---

# 5. $bi   = [BidirectionalRNN]::new("LSTM", 1, 8)

#    $bi.PrintSummary()

#    $biOut = $bi.Forward($data.Sequences[0])

#    Write-Host "Bidirectional output size: $($biOut[0].Length)  (should be 16 = 8*2)"

#

# --- Attention ---

# 6. $attn  = [DotProductAttention]::new()

#    $keys   = $out2   # LSTM hidden states as keys

#    $query  = $out2[-1]  # last hidden state as query

#    $context = $attn.Forward($query, $keys, $keys)

#    $attn.PrintAttentionMap(@("t0","t1","t2","t3","t4","t5","t6","t7","t8","t9"))

#

# --- Architecture comparison ---

# 7. Compare-RNNArchitectures -sequence $data.Sequences[0]

#

# --- Seq2Seq ---

# 8. $s2s   = [Seq2SeqModel]::new(2, 8, 1)

#    $s2s.PrintSummary()

#    $binData = Get-VBAFSequenceDataset -Name "BinaryAdd"

#    $ctx     = $s2s.Encode($binData.Sequences[0])

#    $decoded = $s2s.Decode($ctx, 5)

#    Write-Host "Encoded and decoded binary addition sequence"

#

# --- Sentiment classification ---

# 9. $sentData = Get-VBAFSequenceDataset -Name "SentimentWords"

#    $lstm2 = [LSTMCell]::new(2, 8)

#    $outLayer = [RNNOutputLayer]::new(8, 2, "softmax")

#    $correct = 0

#    for ($i = 0; $i -lt $sentData.Sequences.Length; $i++) {

#        $hiddens = $lstm2.Forward($sentData.Sequences[$i])

#        $probs   = $outLayer.Forward($hiddens[-1])

#        $pred    = if ($probs[0] -gt $probs[1]) { 0 } else { 1 }

#        if ($pred -eq $sentData.Labels[$i]) { $correct++ }

#    }

#    Write-Host "Sentiment accuracy (untrained): $correct / $($sentData.Sequences.Length)"

# ============================================================

Write-Host "📦 VBAF.ML.RNN.ps1 loaded  [Phase 6 🧠]" -ForegroundColor Green

Write-Host "   Classes   : BasicRNNCell"               -ForegroundColor Cyan

Write-Host "              LSTMCell"                    -ForegroundColor Cyan

Write-Host "              GRUCell"                     -ForegroundColor Cyan

Write-Host "              BidirectionalRNN"            -ForegroundColor Cyan

Write-Host "              DotProductAttention"         -ForegroundColor Cyan

Write-Host "              Seq2SeqModel"                -ForegroundColor Cyan

Write-Host "              RNNOutputLayer"              -ForegroundColor Cyan

Write-Host "   Functions : Compare-RNNArchitectures"  -ForegroundColor Cyan

Write-Host "              Get-VBAFSequenceDataset"     -ForegroundColor Cyan

Write-Host "              Invoke-GradientClip"         -ForegroundColor Cyan

Write-Host ""

Write-Host "   Quick start:" -ForegroundColor Yellow

Write-Host '   $lstm = [LSTMCell]::new(1, 8)'                       -ForegroundColor White

Write-Host '   $lstm.PrintSummary()'                                 -ForegroundColor White

Write-Host '   $data = Get-VBAFSequenceDataset -Name "SineWave"'    -ForegroundColor White

Write-Host '   $out  = $lstm.Forward($data.Sequences[0])'           -ForegroundColor White

Write-Host '   Write-Host "Steps: $($out.Length)  Hidden: $($out[0].Length)"' -ForegroundColor White

Write-Host ""