Functions/GenXdev.FileSystem/Find-DuplicateFiles.ps1
################################################################################ <# .SYNOPSIS Find duplicate files by name and properties across specified directories. .DESCRIPTION Takes an array of directory paths, searches each path recursively for files, then groups files by name and optionally by size and modified date. Returns groups containing two or more duplicate files. .PARAMETER Paths One or more directory paths to search for duplicate files. .PARAMETER DontCompareSize Skip file size comparison when determining duplicates. .PARAMETER DontCompareModifiedDate Skip last modified date comparison when determining duplicates. .EXAMPLE Find-DuplicateFiles -Paths "C:\Folder1","D:\Folder2" -DontCompareSize .EXAMPLE Get-Item "C:\Folder1","D:\Folder2" | Find-DuplicateFiles #> function Find-DuplicateFiles { [CmdletBinding()] [Alias("fdf")] param( ############################################################################### [Parameter( Mandatory = $true, Position = 0, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true, HelpMessage = "One or more directory paths to search for duplicates" )] [ValidateNotNullOrEmpty()] [string[]] $Paths, ############################################################################### [Parameter( Mandatory = $false, Position = 1, HelpMessage = "Skip file size comparison when grouping duplicates" )] [switch] $DontCompareSize, ############################################################################### [Parameter( Mandatory = $false, Position = 2, HelpMessage = "Skip last modified date comparison when grouping duplicates" )] [switch] $DontCompareModifiedDate ############################################################################### ) begin { # normalize all input paths to full paths $normalizedPaths = @() $Paths | ForEach-Object { $normalizedPaths += (Expand-Path $_) } # helper function to generate unique key for file comparison function Get-FileKey([System.IO.FileInfo]$file) { # start with filename as base key $key = $file.Name # add size to key if size comparison is enabled if (-not $DontCompareSize) { $key += "|$($file.Length)" } # add modified date to key if date comparison is enabled if (-not $DontCompareModifiedDate) { $key += "|$($file.LastWriteTimeUtc.ToString('o'))" } return $key } # initialize generic list for better performance with large collections $allFiles = [System.Collections.Generic.List[System.IO.FileInfo]]::new() } process { # process each normalized path foreach ($path in $normalizedPaths) { # verify directory exists before processing if ([System.IO.Directory]::Exists($path)) { Write-Verbose "Scanning directory: $path" # get all files using direct .NET IO methods for performance [System.IO.Directory]::GetFiles($path, "*.*", [System.IO.SearchOption]::AllDirectories) | ForEach-Object { $null = $allFiles.Add([System.IO.FileInfo]::new($_)) } } else { Write-Warning "Directory not found: $path" } } } end { # group files by composite key and return groups with duplicates $allFiles | Group-Object -Property { Get-FileKey $_ } | Where-Object { $_.Count -gt 1 } | ForEach-Object { # create custom object for each group of duplicates [PSCustomObject]@{ FileName = $_.Group[0].Name Files = $_.Group } } } } ################################################################################ |