Public/Compare-FileShareIntegrity.ps1
|
function Compare-FileShareIntegrity { <# .SYNOPSIS Compares file integrity between the same Azure file share on two storage accounts. .DESCRIPTION Mounts the same file share from source and destination storage accounts and performs a tiered comparison: 1. Existence — files present on one side but missing from the other. 2. Size — files differing in size. 3. Deep check (optional): -Partial — first + last 8 KB byte comparison (~97 % less I/O). -HashAll — full MD5 hashes on all same-size files. -SampleCount — full MD5 on a random sample of N files. Results are written to a timestamped CSV report and summarised on the console. .PARAMETER SourceAccountName Name of the source storage account. .PARAMETER DestinationAccountName Name of the destination storage account. .PARAMETER ShareName Name of the file share. If omitted, all shares on the source are compared. .PARAMETER SourceDriveLetter Drive letter for source mount. Default: X. .PARAMETER DestinationDriveLetter Drive letter for destination mount. Default: Y. .PARAMETER Partial Partial byte comparison on same-size files. .PARAMETER HashAll Full MD5 on all same-size files. .PARAMETER SampleCount Full MD5 on N randomly selected same-size files. .PARAMETER ThrottleLimit Maximum parallel deep-check workers. Default: 8. .PARAMETER LogDirectory Base directory for reports. Default: current directory. .EXAMPLE Compare-FileShareIntegrity -SourceAccountName 'sourceaccount' -DestinationAccountName 'destaccount' -ShareName 'finance' .EXAMPLE Compare-FileShareIntegrity -SourceAccountName 'sourceaccount' -DestinationAccountName 'destaccount' -Partial #> [CmdletBinding(SupportsShouldProcess)] [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSAvoidUsingPositionalParameters', '', Justification = 'Join-Path positional parameters are idiomatic and readable')] param( [Parameter(Mandatory)] [string]$SourceAccountName, [Parameter(Mandatory)] [string]$DestinationAccountName, [Parameter()] [string]$ShareName, [Parameter()] [ValidatePattern('^[A-Za-z]$')] [string]$SourceDriveLetter = 'X', [Parameter()] [ValidatePattern('^[A-Za-z]$')] [string]$DestinationDriveLetter = 'Y', [Parameter()] [switch]$Partial, [Parameter()] [switch]$HashAll, [Parameter()] [ValidateRange(1, [int]::MaxValue)] [int]$SampleCount, [Parameter()] [ValidateRange(1, 64)] [int]$ThrottleLimit = 8, [Parameter()] [string]$LogDirectory = (Get-Location).Path ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' # ── Validate mutually exclusive switches ───────────────────────────────── $deepModes = @($Partial.IsPresent, $HashAll.IsPresent, ($SampleCount -gt 0)) | Where-Object { $_ } if (($deepModes | Measure-Object).Count -gt 1) { throw 'Only one of -Partial, -HashAll, or -SampleCount may be specified.' } $deepCheckMode = if ($Partial) { 'Partial' } elseif ($HashAll) { 'HashAll' } elseif ($SampleCount) { 'Sample' } else { 'None' } # ── Pre-flight ─────────────────────────────────────────────────────────── Assert-AzCliLogin | Out-Null if ($SourceDriveLetter -eq $DestinationDriveLetter) { throw 'Source and destination drive letters must be different.' } $srcDrive = "${SourceDriveLetter}:" $dstDrive = "${DestinationDriveLetter}:" foreach ($drive in @($srcDrive, $dstDrive)) { if (Test-Path "${drive}\") { throw "Drive '$drive' is already mounted. Choose a different drive letter or dismount it first." } } $sourceKey = Get-StorageAccountKey -AccountName $SourceAccountName $destKey = Get-StorageAccountKey -AccountName $DestinationAccountName # ── Resolve share list ─────────────────────────────────────────────────── if ($ShareName) { $sharesToProcess = @($ShareName) } else { Write-Host "[$(Get-Timestamp)] No -ShareName specified. Listing all shares on '$SourceAccountName'..." -ForegroundColor Cyan $sharesRaw = az storage share list --account-name $SourceAccountName --account-key $sourceKey --query '[].name' -o tsv 2>&1 if ($LASTEXITCODE -ne 0) { throw "Failed to list shares: $sharesRaw" } $sharesToProcess = @($sharesRaw | Where-Object { $_ -and $_ -isnot [System.Management.Automation.ErrorRecord] }) if ($sharesToProcess.Count -eq 0) { throw "No file shares found on '$SourceAccountName'." } Write-Host " Found $($sharesToProcess.Count) share(s): $($sharesToProcess -join ', ')" -ForegroundColor Green } $reportDir = Join-Path $LogDirectory 'logs' "compare-integrity_$(Get-Date -Format 'yyyyMMdd_HHmmss')" New-Item -ItemType Directory -Path $reportDir -Force -Confirm:$false -WhatIf:$false | Out-Null $grandIssues = 0 $shareResults = [System.Collections.Generic.List[hashtable]]::new() # ── Process each share ─────────────────────────────────────────────────── $shareCounter = 0 foreach ($currentShare in $sharesToProcess) { $shareCounter++ if ($sharesToProcess.Count -gt 1) { Write-Host "`n┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -ForegroundColor Magenta Write-Host "┃ Share $shareCounter of $($sharesToProcess.Count): $currentShare" -ForegroundColor Magenta Write-Host '┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━' -ForegroundColor Magenta } $mountedSource = $false $mountedDest = $false $csvWriter = $null try { Mount-SmbDrive -AccountName $SourceAccountName -AccountKey $sourceKey -ShareName $currentShare -DriveLetter $SourceDriveLetter $mountedSource = $true Mount-SmbDrive -AccountName $DestinationAccountName -AccountKey $destKey -ShareName $currentShare -DriveLetter $DestinationDriveLetter $mountedDest = $true $srcRoot = "${srcDrive}\" $dstRoot = "${dstDrive}\" # ── Build file indices ─────────────────────────────────────── function Build-FileIndex { param([string]$Root, [string]$Label) $index = [System.Collections.Generic.Dictionary[string, long]]::new([System.StringComparer]::OrdinalIgnoreCase) $count = 0 $enumOptions = [System.IO.EnumerationOptions]::new() $enumOptions.RecurseSubdirectories = $true $enumOptions.IgnoreInaccessible = $true $enumOptions.AttributesToSkip = [System.IO.FileAttributes]::None Write-Host "[$(Get-Timestamp)] Enumerating files on $Label..." -ForegroundColor Cyan foreach ($fullPath in [System.IO.Directory]::EnumerateFiles($Root, '*', $enumOptions)) { $rel = $fullPath.Substring($Root.Length) try { $index[$rel] = ([System.IO.FileInfo]::new($fullPath)).Length } catch { $index[$rel] = -1 } $count++ if ($count % 10000 -eq 0) { Write-Host " ... $count files enumerated" -ForegroundColor Gray } } Write-Host " Found $count file(s) on $Label." -ForegroundColor Green return $index } $srcIndex = Build-FileIndex -Root $srcRoot -Label 'source' $dstIndex = Build-FileIndex -Root $dstRoot -Label 'destination' # ── CSV report ─────────────────────────────────────────────── $csvPath = Join-Path $reportDir "${currentShare}_integrity-report.csv" $csvWriter = [System.IO.StreamWriter]::new($csvPath, $false, [System.Text.Encoding]::UTF8) $csvWriter.WriteLine('"RelativePath","Status","SourceSize","DestSize","SourceMD5","DestMD5","Error"') function Write-CsvRow { param([System.IO.StreamWriter]$Writer, [string]$RelativePath, [string]$Status, [string]$SourceSize, [string]$DestSize, [string]$SourceMD5, [string]$DestMD5, [string]$Error) $escaped = $RelativePath.Replace('"', '""') $errEscaped = $Error.Replace('"', '""') $Writer.WriteLine("`"$escaped`",`"$Status`",`"$SourceSize`",`"$DestSize`",`"$SourceMD5`",`"$DestMD5`",`"$errEscaped`"") } # ── Tier 1 & 2: Existence + Size ───────────────────────────── Write-Host "`n[$(Get-Timestamp)] Comparing $($srcIndex.Count + $dstIndex.Count) index entries..." -ForegroundColor Cyan $missingOnDestCount = 0 $missingOnSourceCount = 0 $sizeMismatchCount = 0 $sizeMatchCandidates = [System.Collections.Generic.List[string]]::new() foreach ($kvp in $srcIndex.GetEnumerator()) { $rel = $kvp.Key $srcLen = $kvp.Value $dstLen = [long]0 if ($dstIndex.TryGetValue($rel, [ref]$dstLen)) { if ($srcLen -ne $dstLen) { $sizeMismatchCount++ Write-CsvRow -Writer $csvWriter -RelativePath $rel -Status 'SizeMismatch' -SourceSize $srcLen -DestSize $dstLen -SourceMD5 '' -DestMD5 '' -Error '' } else { $sizeMatchCandidates.Add($rel) } } else { $missingOnDestCount++ Write-CsvRow -Writer $csvWriter -RelativePath $rel -Status 'MissingOnDestination' -SourceSize $srcLen -DestSize '' -SourceMD5 '' -DestMD5 '' -Error '' } } foreach ($kvp in $dstIndex.GetEnumerator()) { if (-not $srcIndex.ContainsKey($kvp.Key)) { $missingOnSourceCount++ Write-CsvRow -Writer $csvWriter -RelativePath $kvp.Key -Status 'MissingOnSource' -SourceSize '' -DestSize $kvp.Value -SourceMD5 '' -DestMD5 '' -Error '' } } Write-Host '' Write-Host " Missing on destination : $missingOnDestCount" -ForegroundColor $(if ($missingOnDestCount -gt 0) { 'Red' } else { 'Green' }) Write-Host " Missing on source : $missingOnSourceCount" -ForegroundColor $(if ($missingOnSourceCount -gt 0) { 'Yellow' } else { 'Green' }) Write-Host " Size mismatch : $sizeMismatchCount" -ForegroundColor $(if ($sizeMismatchCount -gt 0) { 'Red' } else { 'Green' }) Write-Host " Same size : $($sizeMatchCandidates.Count)" -ForegroundColor Cyan # ── Tier 3: Deep check ─────────────────────────────────────── $counters = [System.Collections.Concurrent.ConcurrentDictionary[string, long]]::new() $counters['match'] = 0; $counters['mismatch'] = 0; $counters['error'] = 0; $counters['done'] = 0 $csvQueue = [System.Collections.Concurrent.ConcurrentQueue[string]]::new() $deepCheckList = $sizeMatchCandidates switch ($deepCheckMode) { 'None' { $deepCheckList = [System.Collections.Generic.List[string]]::new() Write-Host "`n[$(Get-Timestamp)] No deep-check mode specified. Use -Partial, -HashAll, or -SampleCount." -ForegroundColor Yellow } 'Sample' { $actualSample = [math]::Min($SampleCount, $sizeMatchCandidates.Count) if ($actualSample -lt $sizeMatchCandidates.Count) { $random = [System.Random]::new() $indices = [int[]]::new($sizeMatchCandidates.Count) for ($i = 0; $i -lt $indices.Length; $i++) { $indices[$i] = $i } for ($i = $indices.Length - 1; $i -gt 0; $i--) { $j = $random.Next($i + 1) $tmp = $indices[$i]; $indices[$i] = $indices[$j]; $indices[$j] = $tmp } $sampledList = [System.Collections.Generic.List[string]]::new($actualSample) for ($i = 0; $i -lt $actualSample; $i++) { $sampledList.Add($sizeMatchCandidates[$indices[$i]]) } $deepCheckList = $sampledList } } } $totalToCheck = $deepCheckList.Count if ($WhatIfPreference -and $deepCheckMode -ne 'None') { Write-Host "`n[$(Get-Timestamp)] WhatIf: Would deep-check $totalToCheck file(s)." -ForegroundColor Yellow } elseif ($totalToCheck -gt 0) { Write-Host "`n[$(Get-Timestamp)] Deep-checking $totalToCheck file(s) (mode=$deepCheckMode, ThrottleLimit=$ThrottleLimit)..." -ForegroundColor Cyan $deepCheckStart = [System.Diagnostics.Stopwatch]::StartNew() $deepCheckList | ForEach-Object -ThrottleLimit $ThrottleLimit -Parallel { $rel = $_ $srcRootP = $using:srcRoot $dstRootP = $using:dstRoot $ctrs = $using:counters $queue = $using:csvQueue $total = $using:totalToCheck $srcIdx = $using:srcIndex $verbose = $using:VerbosePreference $mode = $using:deepCheckMode $sw = $using:deepCheckStart $srcPath = Join-Path $srcRootP $rel $dstPath = Join-Path $dstRootP $rel try { $isMatch = $false $detail1 = '' $detail2 = '' if ($mode -eq 'Partial') { $chunkSize = 8192 $srcInfo = [System.IO.FileInfo]::new($srcPath) $fileLen = $srcInfo.Length $srcStream = [System.IO.File]::OpenRead($srcPath) $dstStream = [System.IO.File]::OpenRead($dstPath) try { $readLen = [math]::Min($chunkSize, $fileLen) $srcBuf = [byte[]]::new($readLen) $dstBuf = [byte[]]::new($readLen) $srcStream.Read($srcBuf, 0, $readLen) | Out-Null $dstStream.Read($dstBuf, 0, $readLen) | Out-Null $headMatch = [System.Linq.Enumerable]::SequenceEqual($srcBuf, $dstBuf) $tailMatch = $true if ($headMatch -and $fileLen -gt $chunkSize) { $tailStart = [math]::Max(0, $fileLen - $chunkSize) $tailLen = [int]($fileLen - $tailStart) $srcBuf2 = [byte[]]::new($tailLen) $dstBuf2 = [byte[]]::new($tailLen) $srcStream.Seek($tailStart, [System.IO.SeekOrigin]::Begin) | Out-Null $dstStream.Seek($tailStart, [System.IO.SeekOrigin]::Begin) | Out-Null $srcStream.Read($srcBuf2, 0, $tailLen) | Out-Null $dstStream.Read($dstBuf2, 0, $tailLen) | Out-Null $tailMatch = [System.Linq.Enumerable]::SequenceEqual($srcBuf2, $dstBuf2) } $isMatch = $headMatch -and $tailMatch } finally { $srcStream.Dispose(); $dstStream.Dispose() } } else { $srcStream = [System.IO.File]::OpenRead($srcPath) try { $md5s = [System.Security.Cryptography.MD5]::Create() $detail1 = [BitConverter]::ToString($md5s.ComputeHash($srcStream)).Replace('-', '').ToLowerInvariant() } finally { $srcStream.Dispose() } $dstStream = [System.IO.File]::OpenRead($dstPath) try { $md5d = [System.Security.Cryptography.MD5]::Create() $detail2 = [BitConverter]::ToString($md5d.ComputeHash($dstStream)).Replace('-', '').ToLowerInvariant() } finally { $dstStream.Dispose() } $isMatch = ($detail1 -eq $detail2) } $escaped = $rel.Replace('"', '""') $size = $srcIdx[$rel] if ($isMatch) { $ctrs.AddOrUpdate('match', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 }) | Out-Null if ($verbose -eq 'Continue') { $queue.Enqueue("`"$escaped`",`"OK`",`"$size`",`"$size`",`"$detail1`",`"$detail2`",`"`"") } } else { $status = if ($mode -eq 'Partial') { 'ByteMismatch' } else { 'HashMismatch' } $ctrs.AddOrUpdate('mismatch', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 }) | Out-Null $queue.Enqueue("`"$escaped`",`"$status`",`"$size`",`"$size`",`"$detail1`",`"$detail2`",`"`"") } } catch { $ctrs.AddOrUpdate('error', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 }) | Out-Null $escaped = $rel.Replace('"', '""') $errMsg = $_.Exception.Message.Replace('"', '""') $queue.Enqueue("`"$escaped`",`"CheckError`",`"`",`"`",`"`",`"`",`"$errMsg`"") } $done = $ctrs.AddOrUpdate('done', 1, [Func[string, long, long]]{ param($k, $v) $v + 1 }) $interval = [math]::Max(500, [math]::Floor($total * 0.02)) if ($done % $interval -eq 0 -or $done -eq $total) { $pct = [math]::Round(($done / $total) * 100, 1) $elapsed = $sw.Elapsed.ToString('hh\:mm\:ss') Write-Host " [$elapsed] $done / $total ($pct%)" -ForegroundColor Gray } } $deepCheckStart.Stop() $csvLine = $null while ($csvQueue.TryDequeue([ref]$csvLine)) { $csvWriter.WriteLine($csvLine) } Write-Host '' Write-Host " Deep check elapsed : $($deepCheckStart.Elapsed.ToString('hh\:mm\:ss'))" -ForegroundColor Cyan Write-Host " Deep match : $($counters['match'])" -ForegroundColor Green Write-Host " Deep MISMATCH : $($counters['mismatch'])" -ForegroundColor $(if ($counters['mismatch'] -gt 0) { 'Red' } else { 'Green' }) if ($counters['error'] -gt 0) { Write-Host " Deep errors : $($counters['error'])" -ForegroundColor Yellow } } $csvWriter.Flush() $csvWriter.Dispose() $csvWriter = $null # ── Per-share summary ──────────────────────────────────────── $deepMismatchFinal = if ($deepCheckMode -eq 'None' -or $WhatIfPreference) { 0 } else { $counters['mismatch'] } $deepErrorFinal = if ($deepCheckMode -eq 'None' -or $WhatIfPreference) { 0 } else { $counters['error'] } $issueCount = $missingOnDestCount + $missingOnSourceCount + $sizeMismatchCount + $deepMismatchFinal + $deepErrorFinal Write-Host "`n════════════════════════════════════════════════════════════" -ForegroundColor Cyan Write-Host " INTEGRITY CHECK — '$currentShare'" -ForegroundColor Cyan Write-Host '════════════════════════════════════════════════════════════' -ForegroundColor Cyan Write-Host " Total issues : $issueCount" -ForegroundColor $(if ($issueCount -gt 0) { 'Red' } else { 'Green' }) Write-Host " Report : $csvPath" -ForegroundColor Cyan Write-Host '' $grandIssues += $issueCount $shareResults.Add(@{ ShareName = $currentShare; Issues = $issueCount; CsvPath = $csvPath }) } catch { Write-Host "`n ✗ ERROR processing share '$currentShare': $($_.Exception.Message)" -ForegroundColor Red $shareResults.Add(@{ ShareName = $currentShare; Issues = -1; Error = $_.Exception.Message }) if ($sharesToProcess.Count -eq 1) { throw } } finally { if ($csvWriter) { try { $csvWriter.Dispose() } catch { Write-Debug "CSV writer dispose failed: $_" } } if ($mountedDest) { Dismount-SmbDrive -DriveLetter $DestinationDriveLetter } if ($mountedSource) { Dismount-SmbDrive -DriveLetter $SourceDriveLetter } } } # ── Grand summary ──────────────────────────────────────────────────────── if ($sharesToProcess.Count -gt 1) { Write-Host "`n╔════════════════════════════════════════════════════════════" -ForegroundColor Cyan Write-Host "║ GRAND SUMMARY — $($sharesToProcess.Count) shares processed" -ForegroundColor Cyan Write-Host '╚════════════════════════════════════════════════════════════' -ForegroundColor Cyan foreach ($sr in $shareResults) { if ($sr.Issues -eq -1) { Write-Host " $($sr.ShareName) — FAILED ($($sr.Error))" -ForegroundColor Red } else { Write-Host " $($sr.ShareName) — $($sr.Issues) issue(s)" -ForegroundColor $(if ($sr.Issues -gt 0) { 'Red' } else { 'Green' }) } } Write-Host " Total issues: $grandIssues" -ForegroundColor $(if ($grandIssues -gt 0) { 'Red' } else { 'Green' }) Write-Host " Reports in: $reportDir" -ForegroundColor Cyan Write-Host '' } } |