Public/Import-VBDNSLog.ps1
|
# ============================================================ # FUNCTION : Import-VBDNSLog # VERSION : 1.0.0 # CHANGED : 2026-05-07 -- Initial build # AUTHOR : Vibhu Bhatnagar # PURPOSE : Parse DNS debug log files and populate the SQLite database # ENCODING : UTF-8 with BOM # ============================================================ <# .SYNOPSIS Parses one or more Windows DNS debug log files and stores all PACKET records in the DNSLogDB SQLite database. .DESCRIPTION The main workhorse of the DNSLogDB module. Orchestrates the full import pipeline: 1. Resolves -InputPath to a list of .log files (single file, wildcard, or directory) 2. For each file, calls Get-VBImportStatus to check SHA256 hash vs ImportLog -- files already imported are skipped unless -Force is specified 3. Uses ForEach-Object -Parallel (PS7) to process multiple files concurrently 4. Per thread: Invoke-VBDNSLogParser reads and parses the file into an object[] buffer 5. Per thread: Invoke-VBBulkInsert writes the buffer to SQLite using a prepared statement inside a batched transaction 6. Returns one summary object per file The database must be initialised with Initialize-VBDNSLogDatabase before calling this function. Each parallel thread creates its own SQLiteConnection (WAL mode supports concurrent writes). .PARAMETER InputPath Path to a single .log file, a directory containing .log files, or a wildcard pattern (e.g. 'C:\DNS\Logs\*.log'). .PARAMETER DatabasePath Full path to the SQLite .db file. Must be initialised with Initialize-VBDNSLogDatabase. .PARAMETER Recurse Search subdirectories for .log files when -InputPath is a directory. .PARAMETER ThrottleLimit Maximum number of parallel threads for ForEach-Object -Parallel. Default: 8. Reduce if memory pressure is observed on large files. .PARAMETER ExcludePrivateIPs Skip PACKET lines where the client IP is classified as private at parse time. This is a storage decision -- private IP records are never written to the database. .PARAMETER Force Re-import files even if their SHA256 hash already exists in ImportLog. The existing ImportLog row is deleted before re-importing. .EXAMPLE Import-VBDNSLog -InputPath 'C:\DNS\Logs\dns_20260501.log' -DatabasePath 'C:\DNS\dns_analysis.db' .EXAMPLE Import-VBDNSLog -InputPath 'C:\DNS\Logs\' -DatabasePath 'C:\DNS\dns_analysis.db' -ThrottleLimit 4 .EXAMPLE Import-VBDNSLog -InputPath 'C:\DNS\Logs\' -DatabasePath 'C:\DNS\dns_analysis.db' -Recurse -ExcludePrivateIPs .EXAMPLE Import-VBDNSLog -InputPath 'C:\DNS\Logs\dns.log' -DatabasePath 'C:\DNS\dns_analysis.db' -Force -Verbose .OUTPUTS [PSCustomObject] One object per processed file: FileName -- File name (leaf only) FilePath -- Full path RecordCount -- Rows inserted into DNSLog ErrorCount -- Lines that failed to parse DurationSec -- Total parse + insert time Status -- 'Success', 'Skipped', or 'Failed' Message -- Additional detail (skip reason or error message) .NOTES Version : 1.0.0 Author : Vibhu Bhatnagar Modified : 2026-05-07 Category : Public Requires: PowerShell 7.0+ (ForEach-Object -Parallel) Requires: PSSQLite module (Install-Module PSSQLite) Run after: Initialize-VBDNSLogDatabase #> function Import-VBDNSLog { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [string]$InputPath, [Parameter(Mandatory = $true)] [string]$DatabasePath, [switch]$Recurse, [int]$ThrottleLimit = 8, [switch]$ExcludePrivateIPs, [switch]$Force ) # Step 1 -- Validate database exists if (-not (Test-Path $DatabasePath)) { throw "Import-VBDNSLog: Database not found at '$DatabasePath'. Run Initialize-VBDNSLogDatabase first." } # Step 2 -- Resolve input path to list of .log files $logFiles = @() if (Test-Path $InputPath -PathType Container) { $gciParams = @{ Path = $InputPath; Filter = '*.log'; File = $true } if ($Recurse) { $gciParams.Recurse = $true } $logFiles = Get-ChildItem @gciParams | Select-Object -ExpandProperty FullName } elseif ($InputPath -match '[*?]') { $logFiles = Get-ChildItem -Path $InputPath -File | Select-Object -ExpandProperty FullName } else { if (-not (Test-Path $InputPath -PathType Leaf)) { throw "Import-VBDNSLog: File not found: '$InputPath'" } $logFiles = @($InputPath) } if ($logFiles.Count -eq 0) { Write-Warning "Import-VBDNSLog: No .log files found at '$InputPath'" return } Write-Verbose "Import-VBDNSLog: Found $($logFiles.Count) file(s) to process" # Step 3 -- Pre-check import status for all files (sequential, before parallel block) $fileStatuses = @{} foreach ($file in $logFiles) { $status = Get-VBImportStatus -FilePath $file -DatabasePath $DatabasePath $fileStatuses[$file] = $status if ($status.AlreadyImported -and -not $Force) { Write-Verbose "Import-VBDNSLog: Skipping '$file' (already imported on $($status.ImportedAt))" } elseif ($status.AlreadyImported -and $Force) { Invoke-SqliteQuery -DataSource $DatabasePath ` -Query "DELETE FROM ImportLog WHERE FileHash = @hash" ` -SqlParameters @{ hash = $status.FileHash } Write-Verbose "Import-VBDNSLog: Force re-import -- removed existing ImportLog entry for '$file'" } } # Step 4 -- Filter to files that need processing $filesToProcess = $logFiles | Where-Object { -not $fileStatuses[$_].AlreadyImported -or $Force } # Emit skipped file results $logFiles | Where-Object { $fileStatuses[$_].AlreadyImported -and -not $Force } | ForEach-Object { [PSCustomObject]@{ FileName = Split-Path $_ -Leaf FilePath = $_ RecordCount = 0 ErrorCount = 0 DurationSec = 0 Status = 'Skipped' Message = "Already imported on $($fileStatuses[$_].ImportedAt)" } } if ($filesToProcess.Count -eq 0) { Write-Verbose "Import-VBDNSLog: No new files to import." return } Write-Verbose "Import-VBDNSLog: Processing $($filesToProcess.Count) file(s) with ThrottleLimit=$ThrottleLimit" # Step 5 -- Parallel parse and insert (one thread per file) # PS7 parallel runspaces do not inherit module functions -- pass the Private folder # path via $using: and dot-source the required functions inside each thread. # # Progress strategy: Write-Progress inside ForEach-Object -Parallel is NOT forwarded # to the host console. Instead each thread writes counters into a shared # ConcurrentDictionary, and the main thread polls that dictionary in a tight loop # driving all three progress bars (Read -> Parse -> Insert) itself. $excludePrivate = $ExcludePrivateIPs.IsPresent $dbPath = $DatabasePath $hashLookup = $fileStatuses $privateDir = Join-Path $PSScriptRoot '..\Private' # One shared state dict per file being processed. Keyed by file name (leaf). # Parallel threads write; the polling loop below reads. $progressStates = [System.Collections.Concurrent.ConcurrentDictionary[string, System.Collections.Concurrent.ConcurrentDictionary[string,object]]]::new() foreach ($f in $filesToProcess) { $state = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new() $state['Stage'] = 'Queued' # Queued | Reading | Parsing | Inserting | Done | Failed $state['FileName'] = (Split-Path $f -Leaf) $state['FileSizeBytes'] = [long](Get-Item $f).Length $state['ParseTotalBytes'] = [long]0 $state['ParseBytesRead'] = [long]0 $state['ParseLinesRead'] = [long]0 $state['ParsePacketCount'] = [long]0 $state['ParseDone'] = $false $state['InsertTotalRows'] = [long]0 $state['InsertRowsInserted'] = [long]0 $state['InsertDone'] = $false $progressStates[(Split-Path $f -Leaf)] = $state } # Launch the parallel pipeline as a background job so the main thread is free to poll $parallelJob = $filesToProcess | ForEach-Object -AsJob -ThrottleLimit $ThrottleLimit -Parallel { $filePath = $_ $dbPath = $using:dbPath $excludePrivate = $using:excludePrivate $hashLookup = $using:hashLookup $privateDir = $using:privateDir $progressStates = $using:progressStates # Dot-source Private functions into this runspace -- required for PS7 parallel threads Get-ChildItem -Path $privateDir -Filter '*.ps1' | ForEach-Object { . $_.FullName } $fileName = Split-Path $filePath -Leaf $fileHash = $hashLookup[$filePath].FileHash $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() $recordCount = 0 $errorCount = 0 $state = $progressStates[$fileName] try { if ($null -ne $state) { $state['Stage'] = 'Parsing' } $parseParams = @{ FilePath = $filePath ProgressState = $state } if ($excludePrivate) { $parseParams.ExcludePrivateIPs = $true } $parseResult = Invoke-VBDNSLogParser @parseParams $buffer = $parseResult.Buffer # List[object[]] -- never piped through PS pipeline $errorCount = $parseResult.ErrorCount $recordCount = $buffer.Count if ($recordCount -gt 0) { if ($null -ne $state) { $state['Stage'] = 'Inserting' } $duration = $stopwatch.Elapsed.TotalSeconds Invoke-VBBulkInsert ` -DatabasePath $dbPath ` -Buffer $buffer ` -SourceFile $filePath ` -RecordCount $recordCount ` -ErrorCount $errorCount ` -FileHash $fileHash ` -DurationSeconds $duration ` -ProgressState $state | Out-Null } $stopwatch.Stop() if ($null -ne $state) { $state['Stage'] = 'Done' } [PSCustomObject]@{ FileName = $fileName FilePath = $filePath RecordCount = $recordCount ErrorCount = $errorCount DurationSec = [math]::Round($stopwatch.Elapsed.TotalSeconds, 1) Status = 'Success' Message = '' } } catch { $stopwatch.Stop() if ($null -ne $state) { $state['Stage'] = 'Failed' } [PSCustomObject]@{ FileName = $fileName FilePath = $filePath RecordCount = $recordCount ErrorCount = $errorCount DurationSec = [math]::Round($stopwatch.Elapsed.TotalSeconds, 1) Status = 'Failed' Message = $_.Exception.Message } } } # Step 6 -- Progress polling loop on the main thread. # Runs until the background job finishes, updating 3-stage progress bars every 200ms. # Bar IDs: 1 = per-file parent, 2 = Parse child, 3 = Insert child. $totalFiles = $filesToProcess.Count $fileIndex = 0 while ($parallelJob.State -in 'NotStarted','Running') { $fileIndex = 0 foreach ($kvp in $progressStates.GetEnumerator()) { $s = $kvp.Value $fileName = $s['FileName'] $stage = $s['Stage'] $fileIndex++ $fileLabel = "[$fileIndex/$totalFiles] $fileName" switch ($stage) { 'Queued' { Write-Progress -Id 1 -Activity "Import-VBDNSLog" ` -Status "Queued: $fileLabel" -PercentComplete 0 } 'Parsing' { $totalBytes = [long]$s['ParseTotalBytes'] $bytesRead = [long]$s['ParseBytesRead'] $linesRead = [long]$s['ParseLinesRead'] $packetCount = [long]$s['ParsePacketCount'] $pct = if ($totalBytes -gt 0) { [math]::Min(99, [math]::Round($bytesRead / $totalBytes * 100)) } else { 0 } Write-Progress -Id 1 -Activity "Import-VBDNSLog" ` -Status "Parsing: $fileLabel" -PercentComplete $pct Write-Progress -Id 2 -ParentId 1 -Activity "Parsing log file" ` -Status ("Lines: {0:N0} | PACKET rows: {1:N0} | {2}%" -f $linesRead, $packetCount, $pct) ` -PercentComplete $pct Write-Progress -Id 3 -ParentId 1 -Activity "Populating database" ` -Status "Waiting for parse to complete..." -PercentComplete 0 } 'Inserting' { $totalRows = [long]$s['InsertTotalRows'] $rowsInserted = [long]$s['InsertRowsInserted'] $pct = if ($totalRows -gt 0) { [math]::Min(99, [math]::Round($rowsInserted / $totalRows * 100)) } else { 0 } $packetCount = [long]$s['ParsePacketCount'] Write-Progress -Id 1 -Activity "Import-VBDNSLog" ` -Status "Inserting: $fileLabel" -PercentComplete (50 + [math]::Round($pct / 2)) Write-Progress -Id 2 -ParentId 1 -Activity "Parsing log file" ` -Status ("Complete | {0:N0} PACKET rows parsed" -f $packetCount) ` -PercentComplete 100 Write-Progress -Id 3 -ParentId 1 -Activity "Populating database" ` -Status ("{0:N0} of {1:N0} rows inserted ({2}%)" -f $rowsInserted, $totalRows, $pct) ` -PercentComplete $pct } 'Done' { Write-Progress -Id 3 -ParentId 1 -Activity "Populating database" -Completed Write-Progress -Id 2 -ParentId 1 -Activity "Parsing log file" -Completed Write-Progress -Id 1 -Activity "Import-VBDNSLog" ` -Status "Done: $fileLabel" -PercentComplete 100 } 'Failed' { Write-Progress -Id 3 -ParentId 1 -Activity "Populating database" -Completed Write-Progress -Id 2 -ParentId 1 -Activity "Parsing log file" -Completed Write-Progress -Id 1 -Activity "Import-VBDNSLog" ` -Status "Failed: $fileLabel" -PercentComplete 100 } } } Start-Sleep -Milliseconds 200 } # Clear all progress bars Write-Progress -Id 3 -Activity "Populating database" -Completed Write-Progress -Id 2 -Activity "Parsing log file" -Completed Write-Progress -Id 1 -Activity "Import-VBDNSLog" -Completed # Collect and emit results from the completed job Receive-Job -Job $parallelJob Remove-Job -Job $parallelJob } |