StableTablePS.psm1
function New-StableTable { [CmdletBinding()] param ( # Where to store the table $Path, [switch]$SkipIndexing # InMemoryIndex ) $Path = Resolve-Path_Force $Path $table = [StableTable]@{ Path = $Path } if (-not $PSBoundParameters.SkipIndexing -and (Test-Path $Path)) { $table.UpdateIndex() } $table } class StableTable { [string]$Path [System.Text.Encoding]$Encoding = [System.Text.Encoding]::UTF8 [System.Data.DataTable]$WhereGroup = '' [regex[]]$CaptureGroup [hashtable]$Index = @{} # [scriptblock]$Notes = { [System.DateTimeOffset]::now.ToString() } [scriptblock]$Notes = { Get-Date -f d } [int]$JsonDepth = 5 hidden [int]$BomLength StableTable() {} StableTable([switch]$NoIndex) {} # $this.UpdateIndex() # $this.IndexPosition as default position # $this.wheregroup.tablename as last position # $this.UpdateIndex([bigint]$Position) # move most of Create into into UpdateIndex as position 0 # $this.CleanStore() to delete old duplicates CreateIndex() { $this.WhereGroup = '' $this.WhereGroup.TableName = 0 $this.WhereGroup.Columns.AddRange(('name', 'position')) $this.WhereGroup.PrimaryKey = $this.WhereGroup.Columns[0] if ($this.CaptureGroup) { $columns = $this.CaptureGroup.GetGroupNames() | Where-Object { $_ -ne 0 } | Select-Object -Unique $this.WhereGroup.Columns.AddRange($columns) } } UpdateIndex() { $this.CreateIndex() $this.UpdateIndex($this.WhereGroup.TableName) } UpdateIndex([bigint]$position) { $regex = "^(?<key>[^=]*)=(?<notes>[^=]*)=" if (-not $this.WhereGroup.Rows.Count) { $this.CreateIndex() } $sr = $this.OpenReader() if ($sr.Peek() -eq -1) { $sr.Dispose() return } $this.GetBom($sr) if ($position -eq 0) { $position += $this.BomLength } #region CRLF # Determine whether the file has CRLF or just LF line endings # Maybe you can use something from $sr.BasseObject.Position to get rid of this stuff. $sr.BaseStream.Seek($this.BomLength, "Begin") $sr.DiscardBufferedData() $line = $sr.ReadLine() # Need to go backwards just a bit because the readline() consumes the CRLF/LF ending $sr.BaseStream.Seek($line.Length, "Begin") $sr.DiscardBufferedData() $curr = '' do { $prev = $curr $curr = $sr.read() } until ($curr -in 10, -1) if ($prev -eq 13) { $newlineLength = 2 } else { $newlineLength = 1 } $position -= $newlineLength #endregion CRLF $sr.BaseStream.Seek(($position + $newlineLength), "Begin") $sr.DiscardBufferedData() # When updating from a location in the file, you should move back a bit and read to end of line # This ensures that reading is only began on a newline, regardless of what position you specify if ($position -gt 6) { $sr.BaseStream.Seek(($position + $newlineLength - 6), "Begin") $sr.DiscardBufferedData() $null = $sr.readline() } $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() $start = Get-Date $activity = "Indexing" Write-Progress -Activity $activity -Status "Beginning read" -PercentComplete 0 do { if ($stopwatch.Elapsed.TotalSeconds -gt 10) { $stopwatch.Restart() $percent = ($sr.BaseStream.Position / $sr.BaseStream.Length * 100) $elapsed = ((Get-Date) - $start).TotalSeconds $writeProgressSplat = @{ Activity = $activity Status = "Rowcount: $($this.WhereGroup.Rows.Count)" CurrentOperation = "(Reading byte number $($sr.BaseStream.Position))" PercentComplete = $percent SecondsRemaining = $elapsed / $percent * 100 - $elapsed } Write-Progress @writeProgressSplat } $line = $sr.ReadLine() # Validate that this is a good line and also extract the keys from the line via capture groups if ($line -match $regex) { $matched, $value = $line -split "(?<=$regex)", 0, "ExplicitCapture" $row = $this.WhereGroup.NewRow() $row.name = $matches.Key $row.position = $position + $matched.length + $newlineLength if ($this.CaptureGroup) { foreach ($capturegroup in $this.CaptureGroup) { if ($value[-1] -match $capturegroup) { foreach ($capture in $matches.GetEnumerator().where{ $_.Name -ne 0 }) { $name = $capture.name $row.$Name = $capture.Value } } } } if ($existingRow = $this.WhereGroup.Rows.Find($row.name)) { $existingRow.Delete() } $this.WhereGroup.Rows.Add($row) $position += $this.Encoding.GetByteCount($line) + $newlineLength } else { throw "line does not contain a key/value store! Position $position, line: $line" } } until ($sr.EndOfStream) $this.WhereGroup.TableName = $sr.BaseStream.Position $sr.Dispose() } hidden SetIndex([string]$Key, [bigint]$Offset) { $this.Index[$Key] = $Offset } hidden GetBom() { $sr = $this.OpenReader() $this.GetBom($sr) $sr.Dispose() } hidden GetBom([system.io.StreamReader]$sr) { # Check to see if the file starts with a BOM and if it does, increase position by the BOM amount # Works for up to 6 bytes of BOM $items = 1..4 | & { process { $sr.Read() } } $sr.BaseStream.Seek(0, "Begin") $sr.DiscardBufferedData() $buffer = [byte[]]::new(10) $null = $sr.BaseStream.Read($buffer, 0, $buffer.Length) # Turn both arrays into space seperated lists that regex can use for comparison and cleanup # Then convert back to array to see how many bytes are for BOM $this.BomLength = ([string]$buffer -replace " ?$items.*" -split " " | Where-Object { $_ }).Count } hidden [System.IO.StreamReader] OpenReader() { [System.IO.FileStream]$fs = [System.IO.File]::Open($this.Path, "Open", "Read", "ReadWrite") return [System.IO.StreamReader]::New($fs, $this.Encoding) } hidden [System.IO.StreamWriter] OpenWriter() { $sw = [System.IO.StreamWriter]::new($this.Path, $true, $this.Encoding) $sw.AutoFlush = $true return $sw } [object] Get([string[]]$Name) { if ($this.WhereGroup) { $sr = $this.OpenReader() $results = [ordered]@{} foreach ($item in $Name) { $results[$item] = $this.Get(([bigint]($this.WhereGroup.Rows.Find($item)).position), $sr) } $sr.Dispose() if ($results.keys.count -eq 1) { return $results.values[0] } else { return $results } } else { Write-Warning "No index found. Reverting to file scan" return $this.Get($Name, $true) } } [object] Get([hashtable]$Hashtable) { return $this.Get($Hashtable.Keys) } [object] Get([bigint]$Position) { $sr = $this.OpenReader() $out = $this.Get($Position, $sr) $sr.Dispose() return $out } hidden [object] Get([bigint]$Position, [System.IO.StreamReader]$sr) { try { $sr.BaseStream.Seek($Position, "Begin") | Out-Null $sr.DiscardBufferedData() $line = $sr.ReadLine() try { return $line | ConvertFrom-Json } catch { Write-Warning "Converting from JSON failed for this string: $line" return $line } } catch { throw $_ } } hidden [object] Get([string[]]$Name, [switch]$NonIndexed) { $results = $null $regex = "^$($Name -join '=|^')=" $results = [System.IO.File]::ReadAllLines($this.Path) | & { begin { $res = @{} } process { if ($_ -match $regex) { $matched = $name -like ($matches[0] -replace '=') try { $res.$matched = $_ -replace $matches[0] | ConvertFrom-Json } catch { Write-Warning "Converting from JSON failed for this string: $line" $res.$matched = $_ -replace $matches[0] } } } end { $res } } if ($null -eq $results) { return $results } elseif ($results.keys.count -eq 1) { return ($results.values[0]) } else { return $results } } [bigint] Max() { return $this.Max("Name") } [bigint] Max([string]$column) { if ($this.WhereGroup.rows.count) { return [system.linq.enumerable]::Max(([int[]]$this.WhereGroup.Rows.$column)) } else { [int]$max = 0 $sr = $this.OpenReader() $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() $start = Get-Date $activity = "Running" Write-Progress -Activity $activity -Status "Beginning read" -PercentComplete 0 do { if ($stopwatch.Elapsed.TotalSeconds -gt 5) { $stopwatch.Restart() $percent = ($sr.BaseStream.Position / $sr.BaseStream.Length * 100) $elapsed = ((Get-Date) - $start).TotalSeconds $writeProgressSplat = @{ Activity = $activity Status = "Rowcount: $($this.WhereGroup.Rows.Count)" CurrentOperation = "(Reading byte number $($sr.BaseStream.Position))" PercentComplete = $percent SecondsRemaining = $elapsed / $percent * 100 - $elapsed } Write-Progress @writeProgressSplat } $num = $sr.ReadLine() -replace "\D.*" -as [int] if ($num -gt $max) { $max = $num } } until ($sr.EndOfStream) return $max } } Set([string]$Name, $Value) { if ($Name -match "=") { throw "Name must not contain '=' (You used name:'$Name')" } $sw = $this.OpenWriter() $note = & $this.Notes $overhead = "$Name=$note=" $this.SetIndex($Name, ($sw.BaseStream.Length + $overhead.Length)) try { $null = $_.Value | ConvertFrom-Json -ea stop $sw.WriteLine("$overhead$Value") } catch { $sw.WriteLine("$overhead$($Value|ConvertTo-Json -Compress -Depth $this.JsonDepth)") } $sw.Dispose() # Add to index file if present? } Set([hashtable]$Hashtable) { # Sample: # @{ # "key1" = "value2" # "key2" = "value2" # } $sw = $this.OpenWriter() $note = & $this.Notes if ($note -match "=") { throw "Note result must not contain '=' (Your output was :'$note')" } $Hashtable.GetEnumerator() | & { process { $name = $_.Name if ($name -match "=") { throw "Name must not contain '=' (You used name:'$name')" } $overhead = "$name=$note=" $this.SetIndex($Name, ($sw.BaseStream.Length + $overhead.Length)) try { $null = $_.Value | ConvertFrom-Json -ea stop $sw.WriteLine("$overhead$($_.Value)") } catch { $sw.WriteLine("$overhead$($value|ConvertTo-Json -Compress -Depth $this.JsonDepth)") } } } $sw.Dispose() # Add to index if present. # Add to index file if present? } } function Resolve-Path_Force { <# .SYNOPSIS Calls Resolve-Path but works for files that don't exist. .REMARKS From http://devhawk.net/blog/2010/1/22/fixing-powershells-busted-resolve-path-cmdlet #> param ( [string] $FileName ) $FileName = Resolve-Path $FileName -ErrorAction SilentlyContinue -ErrorVariable _frperror if (-not($FileName)) { $FileName = $_frperror[0].TargetObject } return $FileName } |