Public/Compare-FileShareIntegrity.ps1

function Compare-FileShareIntegrity {
    <#
    .SYNOPSIS
        Compares file integrity between the same Azure file share on two storage accounts.
    .DESCRIPTION
        Mounts the same file share from source and destination storage accounts and
        performs a tiered comparison:
          1. Existence — files present on one side but missing from the other.
          2. Size — files differing in size.
          3. Deep check (optional):
             -Partial — first + last 8 KB byte comparison (~97 % less I/O).
             -HashAll — full MD5 hashes on all same-size files.
             -SampleCount — full MD5 on a random sample of N files.
 
        Results are written to a timestamped CSV report and summarised on the console.
    .PARAMETER SourceAccountName
        Name of the source storage account.
    .PARAMETER DestinationAccountName
        Name of the destination storage account.
    .PARAMETER ShareName
        Name of the file share. If omitted, all shares on the source are compared.
    .PARAMETER SourceDriveLetter
        Drive letter for source mount. Default: X.
    .PARAMETER DestinationDriveLetter
        Drive letter for destination mount. Default: Y.
    .PARAMETER Partial
        Partial byte comparison on same-size files.
    .PARAMETER HashAll
        Full MD5 on all same-size files.
    .PARAMETER SampleCount
        Full MD5 on N randomly selected same-size files.
    .PARAMETER ThrottleLimit
        Maximum parallel deep-check workers. Default: 8.
    .PARAMETER LogDirectory
        Base directory for reports. Default: current directory.
    .EXAMPLE
        Compare-FileShareIntegrity -SourceAccountName 'sourceaccount' -DestinationAccountName 'destaccount' -ShareName 'finance'
    .EXAMPLE
        Compare-FileShareIntegrity -SourceAccountName 'sourceaccount' -DestinationAccountName 'destaccount' -Partial
    #>

    [CmdletBinding(SupportsShouldProcess)]
    [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSAvoidUsingPositionalParameters', '', Justification = 'Join-Path positional parameters are idiomatic and readable')]
    param(
        [Parameter(Mandatory)]
        [string]$SourceAccountName,

        [Parameter(Mandatory)]
        [string]$DestinationAccountName,

        [Parameter()]
        [string]$ShareName,

        [Parameter()]
        [ValidatePattern('^[A-Za-z]$')]
        [string]$SourceDriveLetter = 'X',

        [Parameter()]
        [ValidatePattern('^[A-Za-z]$')]
        [string]$DestinationDriveLetter = 'Y',

        [Parameter()]
        [switch]$Partial,

        [Parameter()]
        [switch]$HashAll,

        [Parameter()]
        [ValidateRange(1, [int]::MaxValue)]
        [int]$SampleCount,

        [Parameter()]
        [ValidateRange(1, 64)]
        [int]$ThrottleLimit = 8,

        [Parameter()]
        [string]$LogDirectory = (Get-Location).Path
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    # ── Validate mutually exclusive switches ─────────────────────────────────
    $deepModes = @($Partial.IsPresent, $HashAll.IsPresent, ($SampleCount -gt 0)) | Where-Object { $_ }
    if (($deepModes | Measure-Object).Count -gt 1) {
        throw 'Only one of -Partial, -HashAll, or -SampleCount may be specified.'
    }
    $deepCheckMode = if ($Partial)        { 'Partial' }
                     elseif ($HashAll)     { 'HashAll' }
                     elseif ($SampleCount) { 'Sample'  }
                     else                  { 'None'    }

    # ── Pre-flight ───────────────────────────────────────────────────────────
    Assert-AzCliLogin | Out-Null

    if ($SourceDriveLetter -eq $DestinationDriveLetter) {
        throw 'Source and destination drive letters must be different.'
    }

    $srcDrive = "${SourceDriveLetter}:"
    $dstDrive = "${DestinationDriveLetter}:"
    foreach ($drive in @($srcDrive, $dstDrive)) {
        if (Test-Path "${drive}\") {
            throw "Drive '$drive' is already mounted. Choose a different drive letter or dismount it first."
        }
    }

    $sourceKey = Get-StorageAccountKey -AccountName $SourceAccountName
    $destKey   = Get-StorageAccountKey -AccountName $DestinationAccountName

    # ── Resolve share list ───────────────────────────────────────────────────
    if ($ShareName) {
        $sharesToProcess = @($ShareName)
    }
    else {
        Write-Host "[$(Get-Timestamp)] No -ShareName specified. Listing all shares on '$SourceAccountName'..." -ForegroundColor Cyan
        $sharesRaw = az storage share list --account-name $SourceAccountName --account-key $sourceKey --query '[].name' -o tsv 2>&1
        if ($LASTEXITCODE -ne 0) { throw "Failed to list shares: $sharesRaw" }
        $sharesToProcess = @($sharesRaw | Where-Object { $_ -and $_ -isnot [System.Management.Automation.ErrorRecord] })
        if ($sharesToProcess.Count -eq 0) { throw "No file shares found on '$SourceAccountName'." }
        Write-Host " Found $($sharesToProcess.Count) share(s): $($sharesToProcess -join ', ')" -ForegroundColor Green
    }

    $reportDir = Join-Path $LogDirectory 'logs' "compare-integrity_$(Get-Date -Format 'yyyyMMdd_HHmmss')"
    New-Item -ItemType Directory -Path $reportDir -Force -Confirm:$false -WhatIf:$false | Out-Null

    $grandIssues  = 0
    $shareResults = [System.Collections.Generic.List[hashtable]]::new()

    # ── Process each share ───────────────────────────────────────────────────
    $shareCounter = 0
    foreach ($currentShare in $sharesToProcess) {
        $shareCounter++
        if ($sharesToProcess.Count -gt 1) {
            Write-Host "`n┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -ForegroundColor Magenta
            Write-Host "┃ Share $shareCounter of $($sharesToProcess.Count): $currentShare" -ForegroundColor Magenta
            Write-Host '┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━' -ForegroundColor Magenta
        }

        $mountedSource = $false
        $mountedDest   = $false
        $csvWriter     = $null

        try {
            Mount-SmbDrive -AccountName $SourceAccountName -AccountKey $sourceKey -ShareName $currentShare -DriveLetter $SourceDriveLetter
            $mountedSource = $true
            Mount-SmbDrive -AccountName $DestinationAccountName -AccountKey $destKey -ShareName $currentShare -DriveLetter $DestinationDriveLetter
            $mountedDest = $true

            $srcRoot = "${srcDrive}\"
            $dstRoot = "${dstDrive}\"

            # ── Build file indices ───────────────────────────────────────
            function Build-FileIndex {
                param([string]$Root, [string]$Label)
                $index = [System.Collections.Generic.Dictionary[string, long]]::new([System.StringComparer]::OrdinalIgnoreCase)
                $count = 0
                $enumOptions = [System.IO.EnumerationOptions]::new()
                $enumOptions.RecurseSubdirectories = $true
                $enumOptions.IgnoreInaccessible    = $true
                $enumOptions.AttributesToSkip      = [System.IO.FileAttributes]::None

                Write-Host "[$(Get-Timestamp)] Enumerating files on $Label..." -ForegroundColor Cyan
                foreach ($fullPath in [System.IO.Directory]::EnumerateFiles($Root, '*', $enumOptions)) {
                    $rel = $fullPath.Substring($Root.Length)
                    try { $index[$rel] = ([System.IO.FileInfo]::new($fullPath)).Length }
                    catch { $index[$rel] = -1 }
                    $count++
                    if ($count % 10000 -eq 0) { Write-Host " ... $count files enumerated" -ForegroundColor Gray }
                }
                Write-Host " Found $count file(s) on $Label." -ForegroundColor Green
                return $index
            }

            $srcIndex = Build-FileIndex -Root $srcRoot -Label 'source'
            $dstIndex = Build-FileIndex -Root $dstRoot -Label 'destination'

            # ── CSV report ───────────────────────────────────────────────
            $csvPath   = Join-Path $reportDir "${currentShare}_integrity-report.csv"
            $csvWriter = [System.IO.StreamWriter]::new($csvPath, $false, [System.Text.Encoding]::UTF8)
            $csvWriter.WriteLine('"RelativePath","Status","SourceSize","DestSize","SourceMD5","DestMD5","Error"')

            function Write-CsvRow {
                param([System.IO.StreamWriter]$Writer, [string]$RelativePath, [string]$Status, [string]$SourceSize, [string]$DestSize, [string]$SourceMD5, [string]$DestMD5, [string]$Error)
                $escaped    = $RelativePath.Replace('"', '""')
                $errEscaped = $Error.Replace('"', '""')
                $Writer.WriteLine("`"$escaped`",`"$Status`",`"$SourceSize`",`"$DestSize`",`"$SourceMD5`",`"$DestMD5`",`"$errEscaped`"")
            }

            # ── Tier 1 & 2: Existence + Size ─────────────────────────────
            Write-Host "`n[$(Get-Timestamp)] Comparing $($srcIndex.Count + $dstIndex.Count) index entries..." -ForegroundColor Cyan

            $missingOnDestCount   = 0
            $missingOnSourceCount = 0
            $sizeMismatchCount    = 0
            $sizeMatchCandidates  = [System.Collections.Generic.List[string]]::new()

            foreach ($kvp in $srcIndex.GetEnumerator()) {
                $rel    = $kvp.Key
                $srcLen = $kvp.Value
                $dstLen = [long]0

                if ($dstIndex.TryGetValue($rel, [ref]$dstLen)) {
                    if ($srcLen -ne $dstLen) {
                        $sizeMismatchCount++
                        Write-CsvRow -Writer $csvWriter -RelativePath $rel -Status 'SizeMismatch' -SourceSize $srcLen -DestSize $dstLen -SourceMD5 '' -DestMD5 '' -Error ''
                    }
                    else {
                        $sizeMatchCandidates.Add($rel)
                    }
                }
                else {
                    $missingOnDestCount++
                    Write-CsvRow -Writer $csvWriter -RelativePath $rel -Status 'MissingOnDestination' -SourceSize $srcLen -DestSize '' -SourceMD5 '' -DestMD5 '' -Error ''
                }
            }

            foreach ($kvp in $dstIndex.GetEnumerator()) {
                if (-not $srcIndex.ContainsKey($kvp.Key)) {
                    $missingOnSourceCount++
                    Write-CsvRow -Writer $csvWriter -RelativePath $kvp.Key -Status 'MissingOnSource' -SourceSize '' -DestSize $kvp.Value -SourceMD5 '' -DestMD5 '' -Error ''
                }
            }

            Write-Host ''
            Write-Host " Missing on destination : $missingOnDestCount" -ForegroundColor $(if ($missingOnDestCount -gt 0) { 'Red' } else { 'Green' })
            Write-Host " Missing on source : $missingOnSourceCount" -ForegroundColor $(if ($missingOnSourceCount -gt 0) { 'Yellow' } else { 'Green' })
            Write-Host " Size mismatch : $sizeMismatchCount" -ForegroundColor $(if ($sizeMismatchCount -gt 0) { 'Red' } else { 'Green' })
            Write-Host " Same size : $($sizeMatchCandidates.Count)" -ForegroundColor Cyan

            # ── Tier 3: Deep check ───────────────────────────────────────
            $counters = [System.Collections.Concurrent.ConcurrentDictionary[string, long]]::new()
            $counters['match'] = 0; $counters['mismatch'] = 0; $counters['error'] = 0; $counters['done'] = 0
            $csvQueue = [System.Collections.Concurrent.ConcurrentQueue[string]]::new()

            $deepCheckList = $sizeMatchCandidates

            switch ($deepCheckMode) {
                'None' {
                    $deepCheckList = [System.Collections.Generic.List[string]]::new()
                    Write-Host "`n[$(Get-Timestamp)] No deep-check mode specified. Use -Partial, -HashAll, or -SampleCount." -ForegroundColor Yellow
                }
                'Sample' {
                    $actualSample = [math]::Min($SampleCount, $sizeMatchCandidates.Count)
                    if ($actualSample -lt $sizeMatchCandidates.Count) {
                        $random  = [System.Random]::new()
                        $indices = [int[]]::new($sizeMatchCandidates.Count)
                        for ($i = 0; $i -lt $indices.Length; $i++) { $indices[$i] = $i }
                        for ($i = $indices.Length - 1; $i -gt 0; $i--) {
                            $j = $random.Next($i + 1)
                            $tmp = $indices[$i]; $indices[$i] = $indices[$j]; $indices[$j] = $tmp
                        }
                        $sampledList = [System.Collections.Generic.List[string]]::new($actualSample)
                        for ($i = 0; $i -lt $actualSample; $i++) { $sampledList.Add($sizeMatchCandidates[$indices[$i]]) }
                        $deepCheckList = $sampledList
                    }
                }
            }

            $totalToCheck = $deepCheckList.Count

            if ($WhatIfPreference -and $deepCheckMode -ne 'None') {
                Write-Host "`n[$(Get-Timestamp)] WhatIf: Would deep-check $totalToCheck file(s)." -ForegroundColor Yellow
            }
            elseif ($totalToCheck -gt 0) {
                Write-Host "`n[$(Get-Timestamp)] Deep-checking $totalToCheck file(s) (mode=$deepCheckMode, ThrottleLimit=$ThrottleLimit)..." -ForegroundColor Cyan
                $deepCheckStart = [System.Diagnostics.Stopwatch]::StartNew()

                $deepCheckList | ForEach-Object -ThrottleLimit $ThrottleLimit -Parallel {
                    $rel      = $_
                    $srcRootP = $using:srcRoot
                    $dstRootP = $using:dstRoot
                    $ctrs     = $using:counters
                    $queue    = $using:csvQueue
                    $total    = $using:totalToCheck
                    $srcIdx   = $using:srcIndex
                    $verbose  = $using:VerbosePreference
                    $mode     = $using:deepCheckMode
                    $sw       = $using:deepCheckStart

                    $srcPath = Join-Path $srcRootP $rel
                    $dstPath = Join-Path $dstRootP $rel

                    try {
                        $isMatch = $false
                        $detail1 = ''
                        $detail2 = ''

                        if ($mode -eq 'Partial') {
                            $chunkSize = 8192
                            $srcInfo   = [System.IO.FileInfo]::new($srcPath)
                            $fileLen   = $srcInfo.Length

                            $srcStream = [System.IO.File]::OpenRead($srcPath)
                            $dstStream = [System.IO.File]::OpenRead($dstPath)
                            try {
                                $readLen = [math]::Min($chunkSize, $fileLen)
                                $srcBuf  = [byte[]]::new($readLen)
                                $dstBuf  = [byte[]]::new($readLen)
                                $srcStream.Read($srcBuf, 0, $readLen) | Out-Null
                                $dstStream.Read($dstBuf, 0, $readLen) | Out-Null
                                $headMatch = [System.Linq.Enumerable]::SequenceEqual($srcBuf, $dstBuf)

                                $tailMatch = $true
                                if ($headMatch -and $fileLen -gt $chunkSize) {
                                    $tailStart = [math]::Max(0, $fileLen - $chunkSize)
                                    $tailLen   = [int]($fileLen - $tailStart)
                                    $srcBuf2   = [byte[]]::new($tailLen)
                                    $dstBuf2   = [byte[]]::new($tailLen)
                                    $srcStream.Seek($tailStart, [System.IO.SeekOrigin]::Begin) | Out-Null
                                    $dstStream.Seek($tailStart, [System.IO.SeekOrigin]::Begin) | Out-Null
                                    $srcStream.Read($srcBuf2, 0, $tailLen) | Out-Null
                                    $dstStream.Read($dstBuf2, 0, $tailLen) | Out-Null
                                    $tailMatch = [System.Linq.Enumerable]::SequenceEqual($srcBuf2, $dstBuf2)
                                }
                                $isMatch = $headMatch -and $tailMatch
                            }
                            finally { $srcStream.Dispose(); $dstStream.Dispose() }
                        }
                        else {
                            $srcStream = [System.IO.File]::OpenRead($srcPath)
                            try {
                                $md5s = [System.Security.Cryptography.MD5]::Create()
                                $detail1 = [BitConverter]::ToString($md5s.ComputeHash($srcStream)).Replace('-', '').ToLowerInvariant()
                            }
                            finally { $srcStream.Dispose() }

                            $dstStream = [System.IO.File]::OpenRead($dstPath)
                            try {
                                $md5d = [System.Security.Cryptography.MD5]::Create()
                                $detail2 = [BitConverter]::ToString($md5d.ComputeHash($dstStream)).Replace('-', '').ToLowerInvariant()
                            }
                            finally { $dstStream.Dispose() }

                            $isMatch = ($detail1 -eq $detail2)
                        }

                        $escaped = $rel.Replace('"', '""')
                        $size    = $srcIdx[$rel]
                        if ($isMatch) {
                            $ctrs.AddOrUpdate('match', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 }) | Out-Null
                            if ($verbose -eq 'Continue') {
                                $queue.Enqueue("`"$escaped`",`"OK`",`"$size`",`"$size`",`"$detail1`",`"$detail2`",`"`"")
                            }
                        }
                        else {
                            $status = if ($mode -eq 'Partial') { 'ByteMismatch' } else { 'HashMismatch' }
                            $ctrs.AddOrUpdate('mismatch', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 }) | Out-Null
                            $queue.Enqueue("`"$escaped`",`"$status`",`"$size`",`"$size`",`"$detail1`",`"$detail2`",`"`"")
                        }
                    }
                    catch {
                        $ctrs.AddOrUpdate('error', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 }) | Out-Null
                        $escaped = $rel.Replace('"', '""')
                        $errMsg  = $_.Exception.Message.Replace('"', '""')
                        $queue.Enqueue("`"$escaped`",`"CheckError`",`"`",`"`",`"`",`"`",`"$errMsg`"")
                    }

                    $done = $ctrs.AddOrUpdate('done', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 })
                    $interval = [math]::Max(500, [math]::Floor($total * 0.02))
                    if ($done % $interval -eq 0 -or $done -eq $total) {
                        $pct     = [math]::Round(($done / $total) * 100, 1)
                        $elapsed = $sw.Elapsed.ToString('hh\:mm\:ss')
                        Write-Host " [$elapsed] $done / $total ($pct%)" -ForegroundColor Gray
                    }
                }

                $deepCheckStart.Stop()

                $csvLine = $null
                while ($csvQueue.TryDequeue([ref]$csvLine)) { $csvWriter.WriteLine($csvLine) }

                Write-Host ''
                Write-Host " Deep check elapsed : $($deepCheckStart.Elapsed.ToString('hh\:mm\:ss'))" -ForegroundColor Cyan
                Write-Host " Deep match : $($counters['match'])" -ForegroundColor Green
                Write-Host " Deep MISMATCH : $($counters['mismatch'])" -ForegroundColor $(if ($counters['mismatch'] -gt 0) { 'Red' } else { 'Green' })
                if ($counters['error'] -gt 0) {
                    Write-Host " Deep errors : $($counters['error'])" -ForegroundColor Yellow
                }
            }

            $csvWriter.Flush()
            $csvWriter.Dispose()
            $csvWriter = $null

            # ── Per-share summary ────────────────────────────────────────
            $deepMismatchFinal = if ($deepCheckMode -eq 'None' -or $WhatIfPreference) { 0 } else { $counters['mismatch'] }
            $deepErrorFinal    = if ($deepCheckMode -eq 'None' -or $WhatIfPreference) { 0 } else { $counters['error'] }
            $issueCount = $missingOnDestCount + $missingOnSourceCount + $sizeMismatchCount + $deepMismatchFinal + $deepErrorFinal

            Write-Host "`n════════════════════════════════════════════════════════════" -ForegroundColor Cyan
            Write-Host " INTEGRITY CHECK — '$currentShare'" -ForegroundColor Cyan
            Write-Host '════════════════════════════════════════════════════════════' -ForegroundColor Cyan
            Write-Host " Total issues : $issueCount" -ForegroundColor $(if ($issueCount -gt 0) { 'Red' } else { 'Green' })
            Write-Host " Report : $csvPath" -ForegroundColor Cyan
            Write-Host ''

            $grandIssues += $issueCount
            $shareResults.Add(@{ ShareName = $currentShare; Issues = $issueCount; CsvPath = $csvPath })
        }
        catch {
            Write-Host "`n ✗ ERROR processing share '$currentShare': $($_.Exception.Message)" -ForegroundColor Red
            $shareResults.Add(@{ ShareName = $currentShare; Issues = -1; Error = $_.Exception.Message })
            if ($sharesToProcess.Count -eq 1) { throw }
        }
        finally {
            if ($csvWriter) { try { $csvWriter.Dispose() } catch { Write-Debug "CSV writer dispose failed: $_" } }
            if ($mountedDest)   { Dismount-SmbDrive -DriveLetter $DestinationDriveLetter }
            if ($mountedSource) { Dismount-SmbDrive -DriveLetter $SourceDriveLetter }
        }
    }

    # ── Grand summary ────────────────────────────────────────────────────────
    if ($sharesToProcess.Count -gt 1) {
        Write-Host "`n╔════════════════════════════════════════════════════════════" -ForegroundColor Cyan
        Write-Host "║ GRAND SUMMARY — $($sharesToProcess.Count) shares processed" -ForegroundColor Cyan
        Write-Host '╚════════════════════════════════════════════════════════════' -ForegroundColor Cyan
        foreach ($sr in $shareResults) {
            if ($sr.Issues -eq -1) { Write-Host " $($sr.ShareName) — FAILED ($($sr.Error))" -ForegroundColor Red }
            else { Write-Host " $($sr.ShareName) — $($sr.Issues) issue(s)" -ForegroundColor $(if ($sr.Issues -gt 0) { 'Red' } else { 'Green' }) }
        }
        Write-Host " Total issues: $grandIssues" -ForegroundColor $(if ($grandIssues -gt 0) { 'Red' } else { 'Green' })
        Write-Host " Reports in: $reportDir" -ForegroundColor Cyan
        Write-Host ''
    }
}