cmdlets/Show-TSan.ps1

<#
.Synopsis
Simplify TSan reports parsing
 
.Description
Parsing test output to retrieve a TSan report object collection
 
.Parameter Path
File or collection of files to be parsed for TSan reports.
 
.Parameter InputObject
Collection of strings to be parsed for TSan reports.
 
.Parameter NoHash
Avoid fuzz hashing the reports. Significant speed boost.
 
.Inputs
Sources of TSan logs.
 
.Outputs
Collection of report objects extracted from the inputs.
 
.Example
To parse from several files is advisable to used the -Path argument because it provides
parallelization and progress report.
 
PS> Show-TSan (gci t.*)
 
.Example
Is posible to feed files into the pipeline but that disables paralellization.
 
PS> (gci t.*) | Show-TSan
 
.Example
To parse reports from pipeline:
 
PS> ctest -C Debug -V | Show-TSan
 
any non report related text would be discarded.
 
.Example
Is possible to speed up report processing if hashing is disabled:
 
PS> Show-TSan (gci t.*) -NoHash
 
.Example
Use fuzzy logic to check the most popular reported deadlock issue.
 
PS> $g = Show-TSan (gci) | ? type -NotMatch race | Group-Object -Property fuzzhash
PS> $g | sort -Property Count -Descending | select name, count -first 3
 
    Name Count
    ---- -----
    48 317
    73 62
    72 41
 
.Notes
Thread-Sanitizer has a tendency to duplicate the same report. In this case the MD5hash
would be the same and duplicates can be easily removed by doing:
 
PS> Show-TSan (gci t.*) | sort md5hash | Get-Unique -AsString | measure
#>

function Show-TSan {
    [Alias('sts')]
    [CmdletBinding()]
    Param(
        [Parameter(Mandatory=$true,
            ParameterSetName = 'File',
            HelpMessage = 'Enter one or more filenames',
            ValueFromPipelineByPropertyName=$true,
            Position = 0)]
        [Alias("FullName")]
        [ValidateScript({
            Test-Path -Path $_ -PathType Leaf
            })]
        [String[]] $Path,

        [Parameter(Mandatory=$true,
            ParameterSetName = 'Pipe',
            HelpMessage = 'Pipeline stream',
            ValueFromPipeline=$true)]
        [AllowEmptyString()]
        [AllowNull()]
        [String[]] $InputObject,

        [Parameter(Mandatory=$false,
            HelpMessage = "Don`'t add a Fuzzy hash member")]
        [Switch]
        $NoHash
    )

    Begin
    {
        # sort of perfect forwarding 😅
        $common_params = @{};
        foreach($arg in $PSBoundParameters.GetEnumerator())
        {
            if( $arg.key -ne $Null -and $arg.key -notmatch "Path|InputObject")
            {
                $common_params[$arg.key] = $arg.value
            }
        }

        # Current TSan report
        $arguments = [bool]$Path -or [bool]$InputObject

        # Current TSan report
        $current = $null

        # Current nesting level
        $nesting = 0
    }

    Process
    { 
        # Path arguments processing at the end
        if($arguments) { return }

        # Using Path property from the pipeline
        if($Path)
        {
            Write-Verbose "Processing file from pipeline: $Path"
            Get-Content $Path | Show-TSan @common_params | % { $_.file = $Path | Split-Path -Leaf; $_ }
            return
        }

        # Using pipeline
        if($InputObject)
        {

            Write-Debug "Processing line: $_"

            # Using pipeline input Check for report boundary, note that TSan reports may be interleaved. This would be notified
            # via warning and the affected reports dismissed. Use TSan flags to avoid this issue:
            # + SanitizerCommonFlags (https://github.com/google/sanitizers/wiki/SanitizerCommonFlags)
            # > log_path -> specifies an output file name without extension (the extension is going to be the process pid value)
            # + ThreadSanitizerFlags (https://github.com/google/sanitizers/wiki/ThreadSanitizerFlags)
            # > io_sync -> Controls level of synchronization implied by IO operations.
            if($_ -match "WARNING: ThreadSanitizer: (?<sort>[-\w\s-\(\)/]+) \(pid=(?<pid>\d+)\)")
            {
                Write-Debug "Above line is a header with type=$($Matches.sort) & pid=$($Matches.pid)"

                if($nesting++)
                {
                    Write-Warning "($nesting) Possible mixed report. Report header without closing the last one: $($Matches.pid)"
                    return
                }

                # create a new report object
                $current = [PSCustomObject]@{PSTypeName="eProsima.TSanReport.v1";type=$Matches.sort;pid=$Matches.pid;report=$_;file=$null} 
                Add-Member -InputObject $current -MemberType ScriptMethod -Name ToString -Value {$this.report} -Force

                Write-Verbose "Detected report prolog as $($Matches.sort) $($Matches.pid)"
            }
            elseif($_ -match "SUMMARY: ThreadSanitizer: (?<sort>[-\w\s-\(\)/]+)")
            {
                Write-Debug "Above line is a footer with type=$($Matches.sort)"

                if(--$nesting -or $current -eq $null)
                {
                    # dismiss former report may be tainted
                    $current = $null 
                    Write-Warning "($nesting) Possible mixed report. Report footer without a clear header matched:`n>>>> $_"
                    return
                }

                if(!$NoHash)
                {
                    # mark as a type extension
                    $current.PSObject.TypeNames.Insert(0,"eProsima.TSanReport.v1#hashes")

                    # quick and dirty fuzzy hash, it depends on the kind of report (deadlocks have a larger variance than the data races)
                    if($current.type -match 'data race' )
                    {
                        # admit little differences, about 100 chars
                        $current.report.ToCharArray() | % { $fuzzhash = 0 }{ $fuzzhash += [int]$_ }{ $fuzzhash /= 10000 };
                    }
                    else
                    {
                        # admit differences of about 560 chars (two tweets)
                        $current.report.ToCharArray() | % { $fuzzhash = 0 }{ $fuzzhash += [int]$_ }{ $fuzzhash /= 50000 };
                    }
                    # let's quickly weight also the issue reported
                    $fuzzhash += 100 * $current.type.length

                    Add-Member -InputObject $current -NotePropertyName fuzzhash -NotePropertyValue ([int][Math]::floor($fuzzhash))
                    # actual hash of the report contents (required for Group-Object)
                    Add-Member -InputObject $current -NotePropertyName MD5hash -NotePropertyValue (Get-FileHash -Algorith MD5 `
                               -InputStream ([System.IO.MemoryStream]::new([Text.Encoding]::UTF8.GetBytes($current.report)))).Hash
                    # profit from the actual hashing to improve .Net object management
                    Add-Member -InputObject $current -MemberType ScriptMethod -Name GetHashCode -Value {$this.md5hash.GetHashCode()} -Force
                }

                Write-Verbose "Detected report epilog as $($Matches.sort)"

                # Send to the pipeline and prepare the next one
                $res = $current
                $current = $Null
                return $res
            }
            else
            {
                # Capture the report contents
                if($current)
                {
                    $current.report += "`n$_"

                    Write-Debug "keeping report line: $_"
                }
                else
                {
                    Write-Debug "line disposal: $_"
                }
            }
        }
    }

    End
    {
        # Using arguments, delegate into the pipe
        if($arguments)
        {
            if($Path)
            {
                Write-Verbose "Processing files from Path argument: $Path"

                # Report progress information
                $cur = 0 # current element processed
                $pfunc = {
                    Param([int]$current)

                    $pc = $current/$Path.count*100
                    Write-Progress -Activity "Parsing TSan Reports:" `
                                   -Status ("{0:F2}%" -f $pc) `
                                   -PercentComplete $pc 
                }

                if($NoHash -or $PSVersionTable.PSVersion -lt [System.Version]"7.0")
                {
                    # Serial processing
                    return $Path | % {
                        Get-Item $_ | Show-TSan @common_params
                        & $pfunc(++$cur) }
                }
                else
                {
                    $recursive = { $_ | Show-TSan @common_params}

                    # Parallel processing
                    $job = $Path | Get-Item  | % -Parallel {
                        $_ | Show-TSan @using:common_params
                    } -AsJob -ThrottleLimit 10

                    # workaround for core issue https://github.com/PowerShell/PowerShell/issues/17077
                    $fix = { # TODO: remove when fixed
                        foreach ( $name in ($_ | Get-Member -MemberType ScriptMethod).name)
                        {
                             Add-Member -InputObject $_ -MemberType ScriptMethod -Name $name `
                                        -Value ([ScriptBlock]::Create($_.$name.Script)) -Force -PassThru 
                         }}

                    # Show progress
                    while($job.State -ne "Completed")
                    {
                        # Count the number of completed tasks and output the result at once
                        $done = $job.ChildJobs | ? State -eq "Completed"
                        if($done)
                        {
                            $done | Receive-Job | % { & $fix };
                            & $pfunc($done.count)
                        }
                    }
                    # Retrieve all pending data
                    Receive-Job $job -Wait -AutoRemoveJob | % { & $fix };
                }
            }
            else # InputObject
            {
                Write-Debug "Processing input from InputObject argument"
                # progress cannot be evaluated because Show-TSan calls require the input
                # received to contain complete reports
                return $InputObject | Show-TSan @common_params
            }
        }
    }
}