AdminToolbox.FortiWizard

2.21.0.2

Public/Convert-FAZLogToCSV.ps1

                                <#

    .SYNOPSIS

    Converts Fortinet FortiAnalyzer log files to CSV format.

    .DESCRIPTION

    This function processes large FortiAnalyzer log files and converts them to CSV format.

    It uses parallel processing with runspaces to efficiently handle large files and

    automatically determines the columns based on the log content.

    .PARAMETER InputFile

    The path to the FortiAnalyzer log file to be converted.

    .PARAMETER OutputFile

    The path where the converted CSV file should be saved.

    .PARAMETER scanlines

    The number of lines to scan for headers. Default is 10.

    This is used to determine the unique headers in the log file where different log entry types may be present.

    .EXAMPLE

    Convert-FAZLogToCSV -InputFile "input.log" -OutputFile "output.csv"

    Converts the specified FortiAnalyzer log file to CSV format.

#>

function Convert-FAZLogToCSV {

    [CmdletBinding()]

    param (

        [Parameter(Mandatory = $true)][string]$InputFile,

        [Parameter(Mandatory = $true)][string]$OutputFile,

        [Parameter(Mandatory = $false)][system.int32]$scanlines

    )

    $RunspacePool = $null

    $writer = $null

    $Jobs = @()

    try {

        Write-Verbose "Starting log conversion process"

        Write-Verbose "Configuring runspace pool..."

        # Initial setup for runspaces

        [int]$MaxThreads = (Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors

        $RunspacePool = [runspacefactory]::CreateRunspacePool(1, $MaxThreads)

        $RunspacePool.Open()

        # Create pre-sized ArrayList for jobs

        $Jobs = [System.Collections.ArrayList]::new($MaxThreads)

        Write-Verbose "Scanning $scanlies lines for headers..."

        # Get total line count more efficiently

        $totalLines = [System.IO.File]::ReadAllLines($InputFile).Count

        $ChunkSize = [math]::Ceiling($totalLines / $MaxThreads)

        # Faster header scanning

        $headers = @{}

        if ($null -eq $scanlines) {

            [system.int32]$scanlines = 10

        }

        $headerScanLines = [System.IO.File]::ReadLines($InputFile) | Select-Object -First $scanlines

        foreach ($line in $headerScanLines) {

            $fields = $line.Trim('"').Split('","', [System.StringSplitOptions]::RemoveEmptyEntries)

            foreach ($field in $fields) {

                $field = $field.Trim().Trim('"')

                if ([string]::IsNullOrWhiteSpace($field)) { continue }

                $equalIndex = $field.IndexOf('=')

                if ($equalIndex -gt 0) {

                    $key = $field.Substring(0, $equalIndex).Trim()

                    $headers[$key.Substring(0, 1).ToUpper() + $key.Substring(1)] = $true

                }

                else {

                    $key = $field.Trim('"').Trim()

                    $headers[$key.Substring(0, 1).ToUpper() + $key.Substring(1)] = $true

                }

            }

        }

        $headerArray = $headers.Keys | Sort-Object

        Write-Verbose "Found $($headerArray.Count) unique headers"

        # Optimized processing scriptblock

        $scriptBlock = {

            param($InputFile, $StartLine, $LinesCount, $HeaderArray)

            $reader = $null

            try {

                $results = [System.Collections.ArrayList]::new($LinesCount)

                $buffer = [System.Collections.ArrayList]::new(1000)  # Buffer for better performance

                $reader = [System.IO.StreamReader]::new($InputFile)

                for ($i = 0; $i -lt $StartLine; $i++) {

                    $reader.ReadLine() | Out-Null

                }

                $processedLines = 0

                while ($processedLines -lt $LinesCount -and -not $reader.EndOfStream) {

                    $line = $reader.ReadLine()

                    $entry = @{}

                    # Pre-initialize all headers with empty strings

                    foreach ($header in $HeaderArray) {

                        $entry[$header] = ''

                    }

                    $fields = $line.Trim('"').Split('","', [System.StringSplitOptions]::RemoveEmptyEntries)

                    foreach ($field in $fields) {

                        $equalIndex = $field.IndexOf('=')

                        if ($equalIndex -gt 0) {

                            $key = $field.Substring(0, $equalIndex).Trim()

                            $value = $field.Substring($equalIndex + 1).Trim('"').Trim()

                            $properKey = $key.Substring(0, 1).ToUpper() + $key.Substring(1)

                            $entry[$properKey] = $value

                        }

                    }

                    $values = foreach ($header in $HeaderArray) {

                        """$($entry[$header])"""

                    }

                    $null = $buffer.Add(($values -join ','))

                    if ($buffer.Count -ge 1000) {

                        $null = $results.AddRange($buffer)

                        $buffer.Clear()

                    }

                    $processedLines++

                }

                if ($buffer.Count -gt 0) {

                    $null = $results.AddRange($buffer)

                }

                return $results

            }

            finally {

                if ($reader) { $reader.Dispose() }

                $results.Clear()

                $buffer.Clear()

                [System.GC]::Collect()

            }

        }

        Write-Verbose "Creating processing jobs..."

        Write-Verbose "Processing $totalLines lines in chunks of $ChunkSize"

        # Create jobs for each chunk

        $startLine = 0  # Initialize outside loop

        $jobCount = 0

        while ($startLine -lt $totalLines) {

            $jobCount++

            Write-Verbose "Creating job $jobCount for lines $startLine to $($startLine + $ChunkSize)"

            $powershell = [powershell]::Create().AddScript($scriptBlock)

            $powershell.RunspacePool = $RunspacePool

            $null = $powershell.AddArgument($InputFile)

            $null = $powershell.AddArgument($startLine)

            $null = $powershell.AddArgument($ChunkSize)

            $null = $powershell.AddArgument($headerArray)

            $null = $Jobs.Add(@{

                    PowerShell = $powershell

                    Handle     = $powershell.BeginInvoke()

                })

            $startLine = $startLine + $ChunkSize  # Direct addition instead of +=

        }

        Write-Verbose "Writing headers to output file..."

        # Write headers and use buffered writing for output

        $writer = [System.IO.StreamWriter]::new($OutputFile)

        $writer.WriteLine($headerArray -join ',')

        Write-Verbose "Processing results from $($Jobs.Count) jobs..."

        Write-Verbose "Subsequent jobs will be almost immediate after the first job completes."

        $processedJobs = 0

        foreach ($job in $Jobs) {

            try {

                $processedJobs++

                Write-Verbose "Processing results from job $processedJobs of $($Jobs.Count)"

                $results = $job.PowerShell.EndInvoke($job.Handle)

                $writer.WriteLine([string]::Join([Environment]::NewLine, $results))

            }

            finally {

                $job.PowerShell.Dispose()

                $results = $null

            }

        }

    }

    finally {

        Write-Verbose "Cleaning up resources..."

        if ($writer) { $writer.Dispose() }

        if ($RunspacePool) {

            $RunspacePool.Close()

            $RunspacePool.Dispose()

        }

        # Clear collections

        $Jobs.Clear()

        $headers.Clear()

        $headerArray = $null

        # Force garbage collection

        [System.GC]::Collect()

        [System.GC]::WaitForPendingFinalizers()

        Write-Verbose "Conversion process completed for $outputFile"

    }

}