public/get-deduplicatedReport.ps1

Function Get-deduplicatedReport{
    <#
        Author = "Jos Lieben (jos@lieben.nu)"
        CompanyName = "Lieben Consultancy"
        Copyright = "https://www.lieben.nu/liebensraum/commercial-use/"
         
        Parameters:
        -permissionsFilePath: the path to the new permissions file. Leave one empty to auto-detect both
    #>
        
    Param(
        [Parameter(Mandatory=$false)][String]$permissionsFilePath
    )

    if(!$permissionsFilePath){
        $reportFiles = Get-ChildItem -Path $global:octo.outputFolder -Filter "*.xlsx" | Where-Object { $_.Name -notlike "*delta*" }
        if($reportFiles.Count -lt 1){
            Write-Error "Less than 1 XLSX reports found in $($global:octo.outputFolder). Please run a scan first or make sure you set the output format to XLSX. Deduplication is not possible when scanning to CSV format." -ErrorAction Stop
        }
        $lastReportFile = $reportFiles | Sort-Object -Property LastWriteTime -Descending | Select-Object -First 1
        $permissionsFile = $lastReportFile
        Write-Host "Auto detected permissions file to deduplicate: $($permissionsFile.FullName)"
    }else{
        $permissionsFile = Get-Item -Path $permissionsFilePath
    }

    Write-Progress -Id 1 -Activity "Deduplicating $($permissionsFile.Name)" -Status "Loading sheet metadata......" -PercentComplete 0

    $tabNames = Get-ExcelSheetInfo -Path $permissionsFile.FullName | Select-Object -ExpandProperty Name

    Write-Host ""

    $count = 0
    foreach ($tabName in $tabNames) {
        $count++
        try{$percentComplete = (($count  / ($tabNames.Count)) * 100)}catch{$percentComplete = 0}
        Write-Progress -Id 1 -Activity "Deduplicating $($permissionsFile.Name)" -Status "Loading $tabName into memory..." -PercentComplete $percentComplete

        $tab = Import-Excel -Path $permissionsFile.FullName -WorksheetName $tabName -DataOnly
        Write-Host "Loaded $($tab.Count) rows from $tabName"
        if($tab.Count -eq 0){
            Write-Host "No data found in $tabName, skipping"
            continue
        }

        Write-Progress -Id 1 -Activity "Deduplicating $($permissionsFile.Name)" -Status "Processing $tabName $($tab.Count) rows" -PercentComplete $percentComplete

        $uniqueObjects = [System.Collections.Generic.HashSet[string]]::new()

        $tabDeduped = $tab | Where-Object {
            $hash = ($_ | ConvertTo-Json -Depth 1)
            $uniqueObjects.Add($hash)
        }

        $duplicateCount = $tab.Count - $tabDeduped.Count
        if($duplicateCount -eq 0){
            Write-Host "No duplicate rows found in $tabName"
            [System.GC]::Collect() 
            continue
        }else{
            Write-Progress -Id 1 -Activity "Deduplicating $($permissionsFile.Name)" -Status "Exporting $tabName $($tabDeduped.Count) rows" -PercentComplete $percentComplete
            Write-Host "$($tab.Count) reduced to $($tabDeduped.Count) rows in $tabName, writing to file..." 
            $tabDeduped | Export-Excel -Path $permissionsFile.FullName -WorksheetName $tabName -TableName $tabName -TableStyle Medium10 -AutoSize -ClearSheet
            [System.GC]::Collect() 
        }
        
    }
    
    Write-Progress -Id 1 -Activity "Deduplicating $($permissionsFile.Name)" -Completed
}