VBAF.ML.TimeSeries.ps1

#Requires -Version 5.1
<#
.SYNOPSIS
    Time Series - Datetime Processing and Feature Engineering
.DESCRIPTION
    Implements time series processing from scratch.
    Designed as a TEACHING resource - every step explained.
    Features included:
      - Datetime parsing : flexible format detection
      - Lag features : yesterday's value as today's feature
      - Rolling windows : moving average, std, min, max
      - Seasonal decomposition: trend + seasonal + residual
      - Resampling : daily -> weekly -> monthly aggregation
      - Built-in datasets : synthetic sales and temperature series
    Standalone - no external VBAF dependencies required.
.NOTES
    Part of VBAF - Phase 5 Time Series Module
    PS 5.1 compatible
    Teaching project - every time series concept explained!
#>

$basePath = $PSScriptRoot

# ============================================================
# TEACHING NOTE: What is Time Series data?
# Data where ORDER MATTERS - each point depends on time.
# Examples: stock prices, temperature, sales, heart rate.
#
# Key concepts:
# Trend : long-term direction (going up/down over years)
# Seasonality: repeating pattern (sales up every December)
# Noise : random fluctuation around the pattern
# Lag : past values used to predict future values
# Window : a sliding "view" over recent history
# ============================================================

# ============================================================
# TIME SERIES DATA STRUCTURE
# ============================================================

class TimeSeries {
    [datetime[]] $Timestamps
    [double[]]   $Values
    [string]     $Name
    [string]     $Frequency   # "daily", "weekly", "monthly"

    TimeSeries([datetime[]]$timestamps, [double[]]$values, [string]$name) {
        $this.Timestamps = $timestamps
        $this.Values     = $values
        $this.Name       = $name
        $this.Frequency  = "daily"
    }

    [int] Length() { return $this.Values.Length }

    [void] PrintSummary() {
        Write-Host ""
        Write-Host "╔══════════════════════════════════════╗" -ForegroundColor Cyan
        Write-Host "║ Time Series Summary ║" -ForegroundColor Cyan
        Write-Host "╠══════════════════════════════════════╣" -ForegroundColor Cyan
        Write-Host ("║ Name : {0,-24}║" -f $this.Name)           -ForegroundColor White
        Write-Host ("║ Points : {0,-24}║" -f $this.Values.Length)  -ForegroundColor White
        Write-Host ("║ Start : {0,-24}║" -f $this.Timestamps[0].ToString("yyyy-MM-dd"))  -ForegroundColor White
        Write-Host ("║ End : {0,-24}║" -f $this.Timestamps[-1].ToString("yyyy-MM-dd")) -ForegroundColor White
        $min  = ($this.Values | Measure-Object -Minimum).Minimum
        $max  = ($this.Values | Measure-Object -Maximum).Maximum
        $mean = ($this.Values | Measure-Object -Average).Average
        Write-Host ("║ Min : {0,-24}║" -f [Math]::Round($min,  2)) -ForegroundColor White
        Write-Host ("║ Max : {0,-24}║" -f [Math]::Round($max,  2)) -ForegroundColor White
        Write-Host ("║ Mean : {0,-24}║" -f [Math]::Round($mean, 2)) -ForegroundColor White
        Write-Host "╚══════════════════════════════════════╝" -ForegroundColor Cyan
        Write-Host ""
    }

    # ASCII sparkline visualization
    [void] Plot([int]$width) {
        $n    = $this.Values.Length
        $step = [Math]::Max(1, [int]($n / $width))
        $min  = ($this.Values | Measure-Object -Minimum).Minimum
        $max  = ($this.Values | Measure-Object -Maximum).Maximum
        $range = [Math]::Max($max - $min, 1e-8)
        $bars  = "▁▂▃▄▅▆▇█"

        Write-Host ""
        Write-Host ("📈 {0}" -f $this.Name) -ForegroundColor Green
        Write-Host (" [{0:yyyy-MM-dd} → {1:yyyy-MM-dd}]" -f $this.Timestamps[0], $this.Timestamps[-1]) -ForegroundColor DarkGray

        $line = " "
        for ($i = 0; $i -lt $n; $i += $step) {
            $normalized = ($this.Values[$i] - $min) / $range
            $barIdx     = [int]($normalized * 7)
            $barIdx     = [Math]::Max(0, [Math]::Min(7, $barIdx))
            $line      += $bars[$barIdx]
        }
        Write-Host $line -ForegroundColor Cyan
        Write-Host (" min={0:F1} max={1:F1} mean={2:F1}" -f $min, $max,
            ($this.Values | Measure-Object -Average).Average) -ForegroundColor DarkGray
        Write-Host ""
    }

    [void] Plot() { $this.Plot(60) }
}

# ============================================================
# DATETIME PARSING
# ============================================================
# TEACHING NOTE: Real data has dates in many formats:
# "2024-01-15" ISO format (best!)
# "15/01/2024" European
# "01/15/2024" American
# "Jan 15 2024" Text format
# Always standardise to ISO format internally.
# ============================================================

function ConvertTo-VBAFDateTime {
    param([string[]]$dateStrings, [string]$Format = "auto")

    $formats = @(
        "yyyy-MM-dd", "yyyy/MM/dd", "dd-MM-yyyy", "dd/MM/yyyy",
        "MM/dd/yyyy", "MM-dd-yyyy", "yyyy-MM-dd HH:mm:ss",
        "dd MMM yyyy", "MMM dd yyyy", "yyyyMMdd"
    )

    $results = @()
    foreach ($ds in $dateStrings) {
        $parsed = $null
        if ($Format -ne "auto") {
            $dt = [datetime]::MinValue
            if ([datetime]::TryParseExact($ds.Trim(), $Format,
                [System.Globalization.CultureInfo]::InvariantCulture,
                [System.Globalization.DateTimeStyles]::None, [ref]$dt)) {
                $parsed = $dt
            }
        } else {
            foreach ($fmt in $formats) {
                $dt = [datetime]::MinValue
                if ([datetime]::TryParseExact($ds.Trim(), $fmt,
                    [System.Globalization.CultureInfo]::InvariantCulture,
                    [System.Globalization.DateTimeStyles]::None, [ref]$dt)) {
                    $parsed = $dt
                    break
                }
            }
        }
        if ($null -eq $parsed) {
            Write-Host "⚠️ Could not parse date: '$ds'" -ForegroundColor Yellow
            $results += [datetime]::MinValue
        } else {
            $results += $parsed
        }
    }
    return $results
}

# Extract datetime features as ML-ready numeric columns
function Get-DatetimeFeatures {
    param([datetime[]]$timestamps)

    # TEACHING: Calendar features are VERY useful for ML!
    # The model can learn: "sales spike every Friday"
    # or "temperature drops every January"

    $features = @()
    foreach ($dt in $timestamps) {
        $features += @{
            Year        = $dt.Year
            Month       = $dt.Month
            Day         = $dt.Day
            DayOfWeek   = [int]$dt.DayOfWeek   # 0=Sunday, 6=Saturday
            DayOfYear   = $dt.DayOfYear
            WeekOfYear  = [System.Globalization.CultureInfo]::CurrentCulture.Calendar.GetWeekOfYear(
                          $dt, [System.Globalization.CalendarWeekRule]::FirstDay,
                          [System.DayOfWeek]::Monday)
            Quarter     = [int](($dt.Month - 1) / 3) + 1
            IsWeekend   = if ($dt.DayOfWeek -eq "Saturday" -or $dt.DayOfWeek -eq "Sunday") { 1.0 } else { 0.0 }
            # Cyclical encoding: sin/cos transforms preserve circular nature
            # e.g. month 12 and month 1 are CLOSE, not far apart!
            MonthSin    = [Math]::Round([Math]::Sin(2 * [Math]::PI * $dt.Month / 12), 4)
            MonthCos    = [Math]::Round([Math]::Cos(2 * [Math]::PI * $dt.Month / 12), 4)
            DowSin      = [Math]::Round([Math]::Sin(2 * [Math]::PI * [int]$dt.DayOfWeek / 7), 4)
            DowCos      = [Math]::Round([Math]::Cos(2 * [Math]::PI * [int]$dt.DayOfWeek / 7), 4)
        }
    }
    return $features
}

# ============================================================
# LAG FEATURES
# ============================================================
# TEACHING NOTE: The most powerful time series feature!
# "What was the value 1 day ago? 7 days ago? 30 days ago?"
# These become features for predicting TODAY's value.
#
# Example with lag=1,2,7:
# Date Value Lag1 Lag2 Lag7
# 2024-01-08 105 103 101 98
# 2024-01-09 107 105 103 100
#
# The model learns: "today ~ yesterday + last week"
# ============================================================

function Add-LagFeatures {
    param(
        [TimeSeries] $Series,
        [int[]]      $Lags = @(1, 2, 7)
    )

    $n      = $Series.Length()
    $result = @()

    for ($i = 0; $i -lt $n; $i++) {
        $row = @{
            Timestamp = $Series.Timestamps[$i]
            Value     = $Series.Values[$i]
        }
        foreach ($lag in $Lags) {
            $lagVal = if ($i -ge $lag) { $Series.Values[$i - $lag] } else { [double]::NaN }
            $row["Lag$lag"] = $lagVal
        }
        $result += $row
    }

    Write-Host "🔁 Lag features added: {$($Lags -join ', ')}" -ForegroundColor Green
    Write-Host (" Valid rows (no NaN): {0}/{1}" -f ($n - ($Lags | Measure-Object -Maximum).Maximum), $n) -ForegroundColor Cyan
    return $result
}

# ============================================================
# ROLLING WINDOW STATISTICS
# ============================================================
# TEACHING NOTE: A rolling window computes statistics over
# the last N observations.
#
# Rolling mean (window=7):
# smooths out noise, reveals the trend
# "average of last 7 days"
#
# Rolling std:
# measures volatility/uncertainty
# "how much did the last 7 days vary?"
#
# Rolling min/max:
# "what was the worst/best of the last 7 days?"
# ============================================================

function Add-RollingFeatures {
    param(
        [TimeSeries] $Series,
        [int[]]      $Windows  = @(7, 14, 30),
        [string[]]   $Stats    = @("mean", "std", "min", "max")
    )

    $n      = $Series.Length()
    $result = @()

    for ($i = 0; $i -lt $n; $i++) {
        $row = @{
            Timestamp = $Series.Timestamps[$i]
            Value     = $Series.Values[$i]
        }

        foreach ($w in $Windows) {
            $startIdx = [Math]::Max(0, $i - $w + 1)
            $window   = $Series.Values[$startIdx..$i]

            foreach ($stat in $Stats) {
                $colName = "Roll${w}_${stat}"
                $val     = switch ($stat) {
                    "mean" { ($window | Measure-Object -Average).Average }
                    "std"  {
                        $mu    = ($window | Measure-Object -Average).Average
                        $sumSq = 0.0
                        foreach ($v in $window) { $sumSq += ($v - $mu) * ($v - $mu) }
                        if ($window.Length -gt 1) { [Math]::Sqrt($sumSq / ($window.Length-1)) } else { 0.0 }
                    }
                    "min"  { ($window | Measure-Object -Minimum).Minimum }
                    "max"  { ($window | Measure-Object -Maximum).Maximum }
                    "sum"  { ($window | Measure-Object -Sum).Sum }
                }
                $row[$colName] = [Math]::Round($val, 4)
            }
        }
        $result += $row
    }

    Write-Host "🪟 Rolling features added:" -ForegroundColor Green
    foreach ($w in $Windows) {
        Write-Host (" Window {0,2}: {1}" -f $w, ($Stats -join ", ")) -ForegroundColor Cyan
    }
    return $result
}

# ============================================================
# SEASONAL DECOMPOSITION
# ============================================================
# TEACHING NOTE: Any time series can be decomposed into:
# Value = Trend + Seasonal + Residual
#
# Trend : the long-term direction (linear or smooth)
# Seasonal : repeating pattern with fixed period
# (period=7 for weekly, period=12 for monthly)
# Residual : what's left after removing trend and seasonal
# (noise, anomalies, unexplained variation)
#
# We use a simple approach:
# Trend = centered moving average (window=period)
# Seasonal = average deviation from trend per period position
# Residual = Value - Trend - Seasonal
# ============================================================

function Invoke-SeasonalDecomposition {
    param(
        [TimeSeries] $Series,
        [int]        $Period = 7    # 7=weekly, 12=monthly, 4=quarterly
    )

    $n      = $Series.Length()
    $values = $Series.Values

    # Step 1: Trend via centered moving average
    $trend = @([double]::NaN) * $n
    $half  = [int]($Period / 2)
    for ($i = $half; $i -lt ($n - $half); $i++) {
        $window    = $values[($i - $half)..($i + $half)]
        $trend[$i] = ($window | Measure-Object -Average).Average
    }

    # Step 2: Detrended = Value - Trend
    $detrended = @(0.0) * $n
    for ($i = 0; $i -lt $n; $i++) {
        $detrended[$i] = if ([double]::IsNaN($trend[$i])) { 0.0 } else { $values[$i] - $trend[$i] }
    }

    # Step 3: Seasonal = average detrended value per period position
    $seasonal = @(0.0) * $n
    for ($p = 0; $p -lt $Period; $p++) {
        $periodVals = @()
        for ($i = $p; $i -lt $n; $i += $Period) {
            if (-not [double]::IsNaN($trend[$i])) { $periodVals += $detrended[$i] }
        }
        $avgSeasonal = if ($periodVals.Length -gt 0) {
            ($periodVals | Measure-Object -Average).Average
        } else { 0.0 }

        for ($i = $p; $i -lt $n; $i += $Period) {
            $seasonal[$i] = $avgSeasonal
        }
    }

    # Step 4: Residual = Value - Trend - Seasonal
    $residual = @(0.0) * $n
    for ($i = 0; $i -lt $n; $i++) {
        $t = if ([double]::IsNaN($trend[$i])) { 0.0 } else { $trend[$i] }
        $residual[$i] = $values[$i] - $t - $seasonal[$i]
    }

    # Print decomposition summary
    $seasonalAmp = ($seasonal | Measure-Object -Maximum).Maximum - ($seasonal | Measure-Object -Minimum).Minimum
    $residualStd = 0.0
    $resMean     = ($residual | Measure-Object -Average).Average
    foreach ($r in $residual) { $residualStd += ($r - $resMean) * ($r - $resMean) }
    $residualStd = [Math]::Sqrt($residualStd / $n)

    Write-Host ""
    Write-Host "🔬 Seasonal Decomposition" -ForegroundColor Green
    Write-Host (" Period : {0}" -f $Period)                           -ForegroundColor Cyan
    Write-Host (" Seasonal ampl. : {0:F2}" -f $seasonalAmp)                  -ForegroundColor White
    Write-Host (" Residual std : {0:F2}" -f $residualStd)                   -ForegroundColor White

    # Plot each component
    $trendTS    = [TimeSeries]::new($Series.Timestamps, [double[]]($trend    | ForEach-Object { if ([double]::IsNaN($_)) { 0.0 } else { $_ } }), "Trend")
    $seasonalTS = [TimeSeries]::new($Series.Timestamps, [double[]]$seasonal, "Seasonal")
    $residualTS = [TimeSeries]::new($Series.Timestamps, [double[]]$residual, "Residual")

    $trendTS.Plot(50)
    $seasonalTS.Plot(50)
    $residualTS.Plot(50)

    return @{
        Trend    = $trend
        Seasonal = $seasonal
        Residual = $residual
        Period   = $Period
    }
}

# ============================================================
# RESAMPLING
# ============================================================
# TEACHING NOTE: Resampling changes the frequency of data.
# Downsampling: daily -> weekly -> monthly (aggregate)
# Upsampling : monthly -> daily (interpolate - not covered here)
#
# When aggregating, choose the right statistic:
# Sales -> SUM (total sold per week)
# Price -> MEAN (average price per week)
# Rainfall -> SUM (total rain per week)
# Temp -> MEAN (average temp per week)
# ============================================================

function Invoke-TimeSeriesResample {
    param(
        [TimeSeries] $Series,
        [string]     $Frequency = "weekly",   # "weekly", "monthly", "quarterly"
        [string]     $Aggregation = "mean"    # "mean", "sum", "min", "max", "last"
    )

    $n       = $Series.Length()
    $buckets = @{}

    for ($i = 0; $i -lt $n; $i++) {
        $dt  = $Series.Timestamps[$i]
        $key = switch ($Frequency) {
            "weekly"    { "{0}-W{1:D2}" -f $dt.Year,
                          [System.Globalization.CultureInfo]::CurrentCulture.Calendar.GetWeekOfYear(
                          $dt, [System.Globalization.CalendarWeekRule]::FirstDay, [System.DayOfWeek]::Monday) }
            "monthly"   { "{0}-{1:D2}" -f $dt.Year, $dt.Month }
            "quarterly" { "{0}-Q{1}"   -f $dt.Year, ([int](($dt.Month-1)/3)+1) }
            default     { "{0}-{1:D2}" -f $dt.Year, $dt.Month }
        }

        if (-not $buckets.ContainsKey($key)) {
            $buckets[$key] = @{ Values=@(); FirstDate=$dt }
        }
        $buckets[$key].Values += $Series.Values[$i]
    }

    # Aggregate each bucket
    $newTimestamps = @()
    $newValues     = @()

    foreach ($key in ($buckets.Keys | Sort-Object)) {
        $bucket = $buckets[$key]
        $agg    = switch ($Aggregation) {
            "mean" { ($bucket.Values | Measure-Object -Average).Average }
            "sum"  { ($bucket.Values | Measure-Object -Sum).Sum }
            "min"  { ($bucket.Values | Measure-Object -Minimum).Minimum }
            "max"  { ($bucket.Values | Measure-Object -Maximum).Maximum }
            "last" { $bucket.Values[-1] }
        }
        $newTimestamps += $bucket.FirstDate
        $newValues     += [Math]::Round($agg, 4)
    }

    $resampled = [TimeSeries]::new($newTimestamps, $newValues, "$($Series.Name)_$Frequency")
    $resampled.Frequency = $Frequency

    Write-Host "📅 Resampled: $($Series.Name) -> $Frequency ($Aggregation)" -ForegroundColor Green
    Write-Host (" Before: {0} points" -f $n)                             -ForegroundColor Cyan
    Write-Host (" After : {0} points" -f $resampled.Length())            -ForegroundColor Cyan
    return $resampled
}

# ============================================================
# BUILT-IN DATASETS
# ============================================================

function Get-VBAFTimeSeriesDataset {
    param([string]$Name = "Sales")

    $rng = [System.Random]::new(42)

    switch ($Name) {
        "Sales" {
            Write-Host "📊 Dataset: Daily Sales (365 days)" -ForegroundColor Cyan
            Write-Host " Has: weekly seasonality, upward trend, noise" -ForegroundColor Cyan

            $timestamps = @()
            $values     = @()
            $start      = [datetime]"2023-01-01"

            for ($d = 0; $d -lt 365; $d++) {
                $dt   = $start.AddDays($d)
                # Trend: slowly increasing sales
                $trend    = 100 + $d * 0.1
                # Weekly seasonality: weekends are lower
                $dow      = [int]$dt.DayOfWeek
                $seasonal = if ($dow -eq 0 -or $dow -eq 6) { -20 } else { 10 + $dow * 2 }
                # Monthly bump in December
                $monthly  = if ($dt.Month -eq 12) { 30 } else { 0 }
                # Noise
                $noise    = ($rng.NextDouble() - 0.5) * 20
                $val      = [Math]::Max(0, $trend + $seasonal + $monthly + $noise)
                $timestamps += $dt
                $values     += [Math]::Round($val, 1)
            }
            return [TimeSeries]::new($timestamps, $values, "DailySales")
        }
        "Temperature" {
            Write-Host "📊 Dataset: Daily Temperature (2 years)" -ForegroundColor Cyan
            Write-Host " Has: annual seasonality, random weather noise" -ForegroundColor Cyan

            $timestamps = @()
            $values     = @()
            $start      = [datetime]"2022-01-01"

            for ($d = 0; $d -lt 730; $d++) {
                $dt       = $start.AddDays($d)
                # Annual cycle: cold in winter, warm in summer (Denmark!)
                $seasonal = -15 * [Math]::Cos(2 * [Math]::PI * $dt.DayOfYear / 365)
                $baseline = 10  # mean annual temp
                $noise    = ($rng.NextDouble() - 0.5) * 8
                $val      = $baseline + $seasonal + $noise
                $timestamps += $dt
                $values     += [Math]::Round($val, 1)
            }
            return [TimeSeries]::new($timestamps, $values, "DailyTemperature")
        }
        default {
            Write-Host "❌ Unknown dataset: $Name" -ForegroundColor Red
            Write-Host " Available: Sales, Temperature" -ForegroundColor Yellow
            return $null
        }
    }
}

# ============================================================
# TEST
# 1. Run VBAF.LoadAll.ps1
#
# --- Basic time series ---
# 2. $ts = Get-VBAFTimeSeriesDataset -Name "Sales"
# $ts.PrintSummary()
# $ts.Plot()
#
# --- Datetime features ---
# 3. $dtFeatures = Get-DatetimeFeatures -timestamps $ts.Timestamps
# $dtFeatures[0] # see all features for first day
#
# --- Lag features ---
# 4. $lagged = Add-LagFeatures -Series $ts -Lags @(1, 7, 14)
# $lagged[14] # first row with all lags valid
#
# --- Rolling windows ---
# 5. $rolled = Add-RollingFeatures -Series $ts -Windows @(7, 30) -Stats @("mean","std")
# $rolled[30] # see rolling features
#
# --- Seasonal decomposition ---
# 6. $decomp = Invoke-SeasonalDecomposition -Series $ts -Period 7
# Write-Host "Trend range: $([Math]::Round(($decomp.Trend | Where-Object {-not [double]::IsNaN($_)} | Measure-Object -Minimum).Minimum,1)) to $([Math]::Round(($decomp.Trend | Where-Object {-not [double]::IsNaN($_)} | Measure-Object -Maximum).Maximum,1))"
#
# --- Resampling ---
# 7. $weekly = Invoke-TimeSeriesResample -Series $ts -Frequency "weekly" -Aggregation "sum"
# $monthly = Invoke-TimeSeriesResample -Series $ts -Frequency "monthly" -Aggregation "mean"
# $weekly.Plot()
# $monthly.Plot()
#
# --- Temperature dataset ---
# 8. $temp = Get-VBAFTimeSeriesDataset -Name "Temperature"
# $decomp2 = Invoke-SeasonalDecomposition -Series $temp -Period 365
# $annual = Invoke-TimeSeriesResample -Series $temp -Frequency "monthly" -Aggregation "mean"
# $annual.Plot()
# ============================================================
Write-Host "📦 VBAF.ML.TimeSeries.ps1 loaded" -ForegroundColor Green
Write-Host " Classes : TimeSeries"                        -ForegroundColor Cyan
Write-Host " Functions : ConvertTo-VBAFDateTime"            -ForegroundColor Cyan
Write-Host " Get-DatetimeFeatures"               -ForegroundColor Cyan
Write-Host " Add-LagFeatures"                    -ForegroundColor Cyan
Write-Host " Add-RollingFeatures"                -ForegroundColor Cyan
Write-Host " Invoke-SeasonalDecomposition"       -ForegroundColor Cyan
Write-Host " Invoke-TimeSeriesResample"          -ForegroundColor Cyan
Write-Host " Get-VBAFTimeSeriesDataset"          -ForegroundColor Cyan
Write-Host ""
Write-Host " Quick start:" -ForegroundColor Yellow
Write-Host ' $ts = Get-VBAFTimeSeriesDataset -Name "Sales"' -ForegroundColor White
Write-Host ' $ts.PrintSummary()'                            -ForegroundColor White
Write-Host ' $ts.Plot()'                                    -ForegroundColor White
Write-Host ""