DuplicateFinder.ps1

class UniqueSize
{
    UniqueSize()
    {
        $this.Count=0
        $this.ListOfFiles=@()
        $this.hashOfFiles=@()
    }
    [int32]$FileSize
    [int32]$Count
    [System.Collections.Generic.List[System.Object]]$ListOfFiles
    [System.Collections.Generic.List[System.Object]]$hashOfFiles
}

#Generate a file list that grouped by size and removed where size is unique
#Generate a reference object for each size, and add file list and hashes
#Group by hash
#Remove those hashes and file that are unique
#Generate a new list based on unique hash instead of size.
#Ready to manage delete...

function FindDuplicates()
{
    
    #Generate a file list that gruped by size and removed where size is unique
    Write-Host "$(Get-Date -Format "dd.MM.yyyy-HH:mm:ss [1/3]>") Building file list"
    $FullListOfDuplicationGroups=Get-ChildItem -File -Recurse | Sort-Object {$_.Length} | Group-Object -Property Length | where-Object {$_.Count -gt 1}


    Write-Host "$(Get-Date -Format "dd.MM.yyyy-HH:mm:ss [2/3]>") Generate hash for potential duplication candidatas"
    $ListOfUniqueHashObjects=@()
    
    $GroupCounter=0
    foreach($nextFileGroup in $FullListOfDuplicationGroups)
    {
        
        Write-Progress -Id 1 -Activity "Generate Hash for each duplication candidates" -status "Group completed $GroupCounter" -percentComplete ($GroupCounter / $FullListOfDuplicationGroups.Count*100)
        $GroupCounter++
        #Generate an reference object for each file size, that contain all the
        [UniqueSize]$NewSize=[UniqueSize]::new()
        $NewSize.FileSize=$nextFileGroup.Name
        $NewSize.Count=$nextFileGroup.Count
        $NewSize.ListOfFiles+=$nextFileGroup.Group
        
        #Generate hash for files
        foreach($NextFileToHash in $NewSize.ListOfFiles)
        {
            $NewSize.hashOfFiles+=($NextFileToHash | Get-FileHash).hash
        }

        #Group by hash and generate new reference object for each invidual hash
        $NewUniqueHashes=$NewSize.hashOfFiles | Group-Object | Where-Object {$_.Count -gt 1}
        
        #If only one hash group exists just copy over the original $NewSize object,
        #If more than one hash exists, generate separate object for each.
        if($NewUniqueHashes.Count -eq 1)
        {
            $ListOfUniqueHashObjects+=$NewSize
        }
        else
        {
            foreach($nextHashGroup in $NewUniqueHashes)
            {
                [UniqueSize]$NewHashSize=[UniqueSize]::new()
                $NewHashSize.FileSize=$nextFileGroup.Name
            
            
                for([int]$i=0; $i -lt $NewSize.ListOfFiles.Count;$i++)
                {
                    if($NewSize.hashOfFiles[$i] -eq $nextHashGroup.Name)
                    {
                        $NewHashSize.ListOfFiles+=$NewSize.ListOfFiles[$i]
                        $NewHashSize.hashOfFiles+=$NewSize.hashOfFiles[$i]
                        $NewHashSize.Count++
                    }
                }
                $ListOfUniqueHashObjects+=$NewHashSize
            }
        }
        
    }
    Write-Progress -Id 1 -Activity "Generate Hash for each duplication candidates" -status "Completed" -percentComplete 100
    return $ListOfUniqueHashObjects
}
function CleanUpManual()
{
    param($ListOfDuplicatesGroups)
    $GroupCounter=0
    foreach($nextDuplicateGroup in $ListOfDuplicatesGroups)
    {
        
        $firstFile=$nextDuplicateGroup.ListOfFiles[0]
        $RestOfFiles=$nextDuplicateGroup.ListOfFiles | select -Skip 1

        Write-host "============================================================================================================="

        $FirstLineString="`n[0] {0,-20} {1} {2}" -f $firstFile.Name, $($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName 
        Write-Host $FirstLineString -ForegroundColor Cyan

        $FileCounter=1
        foreach($nextFile in $RestOfFiles)
        {
            " [{3}] {0,-20} {1} {2}" -f $nextFile.Name, $($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFile.DirectoryName, $FileCounter
            $FileCounter++
        }

        Write-Host "`nWhich one would you like to keep? Please specify by Nr of the file. `nIf you do not define whichone and hit enter, de default will be kept"
        $ItIsNotANumber=$true
        Do
        {
            try
            {
                [int]$FileNr=Read-Host 
                if($FileCounter -gt  $FileNr)
                {
                    $ItIsNotANumber=$false
                }
                else
                {
                    "Please enter your choise again!"
                }
            }
            catch
            {
                "Please enter your choise again!"
            }
        }while($ItIsNotANumber)
        
        if($FileNr -eq 0)
        {
            foreach($nextFileToDelete in $RestOfFiles)
            {
                "This has been deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt
                $nextFileToDelete | Remove-Item
            }
        }
        else
        {
                #Remove first Item
                "This has been deleted {0,-20} {1} {2}" -f $firstFile.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName | Tee-Object -Append DeletedFilesLog.txt
                $firstFile | Remove-Item

                #Build a list without the file that will be kept
                $FileToNotDelete=$RestOfFiles[$($FileNr - 1)]
                $listToDelete=$RestOfFiles | where {$_ -ne $FileToNotDelete}

                #Remove rest of the files
                foreach($nextFileToDelete in $listToDelete)
                {
                    "This has been deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt
                    $nextFileToDelete | Remove-Item
                }
                
                
        }

        Write-Progress -Id 2 -Activity "Duplication groups been progressed" -status "Group completed $GroupCounter" -percentComplete ($GroupCounter / $ListOfDuplicatesGroups.Count*100)
        $GroupCounter++

    }
    Write-Progress -Id 2 -Activity "Duplication groups been progressed" -status "Completed" -percentComplete 100

    Write-host "============================================================================================================="
    Write-host " You can find summary of file that has been deleted in DeletedFilesLog.txt"
    Write-host "============================================================================================================="
    
}
function CleanUpAuto()
{
    param($ListOfDuplicatesGroups)
    $GroupCounter=0
    foreach($nextDuplicateGroup in $ListOfDuplicatesGroups)
    {
        
        $firstFile=$nextDuplicateGroup.ListOfFiles[0]
        $RestOfFiles=$nextDuplicateGroup.ListOfFiles | select -Skip 1

        $FirstLine="This has been kept {0,-20} {1} {2}" -f $firstFile.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName 
        Write-host $FirstLine -ForegroundColor Cyan

        foreach($nextFileToDelete in $RestOfFiles)
        {
            "This has been deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt
            $nextFileToDelete | Remove-Item
        }
        Write-Progress -Id 3 -Activity "Duplication groups been progressed" -status "Group completed $GroupCounter" -percentComplete ($GroupCounter / $ListOfDuplicatesGroups.Count*100)
        $GroupCounter++
    }
    Write-Progress -Id 2 -Activity "Duplication groups been progressed" -status "Completed" -percentComplete 100

    Write-host "============================================================================================================="
    Write-host " You can find summary of file that has been deleted in DeletedFilesLog.txt"
    Write-host "============================================================================================================="
    
}

function CleanUpDefaultDir()
{
    param($ListOfDuplicatesGroups)

    #Collect readonly directory list
    $ReadOnlyDirectoryList=@()
    $InValidPath=$true
    do
    {
        Write-Host "`nPlease define read-only directories"
        try
        {
            $ReadOnlyDirectory=Read-Host
            if(Test-Path -PathType Container $ReadOnlyDirectory )
            {
                $lastChar=$ReadOnlyDirectory.Substring($($ReadOnlyDirectory.Length-1))
                if($lastChar -eq "\")
                {
                    $ReadOnlyDirectoryList+=$ReadOnlyDirectory+"*"
                }
                else
                {
                    $ReadOnlyDirectoryList+=$ReadOnlyDirectory+"\*"
                }

                $YesNoNotValid=$true
                do{
                    Write-host "Do you want to add another Directory?(y/n)"
                    $Answer=read-host
                    if($Answer -match "[yYnN]")
                    {
                        if($Answer -match "[nN]")
                        {
                            $InValidPath=$false   
                        }
                        $YesNoNotValid=$false
                    }
                    else
                    {
                        write-host "Pelase answer with y or n"
                    }
                }While($YesNoNotValid)
                
            }         
            else
            {
                Write-Host "Invalid Path"
            }
        }
        catch
        {
            Write-Host "Invalid path."
        }

        
    }While($InValidPath)


    $ReadOnlyDirectoryList | Format-Table


    $GroupCounter=0
    foreach($nextDuplicateGroup in $ListOfDuplicatesGroups)
    {
        
        $protectedFileList=@()
        $FilesToDelete=@()
        foreach($nextFile in $nextDuplicateGroup.ListOfFiles) 
        {
            $IsThisFileProtected=$false
            foreach($nextPath in $ReadOnlyDirectoryList)
            {
                if($($nextFile.DirectoryName.ToLower()+"\") -like $nextPath)
                {
                    $IsThisFileProtected=$true
                    
                }
            }
            if($IsThisFileProtected)
            {
                $protectedFileList+=$nextFile
            }
            else
            {
                $FilesToDelete+=$nextFile
            }
        }

        if($protectedFileList.count -gt 0)
        {
            foreach($nextFileToProtect in $protectedFileList)
            {
                $FirstLine="This has been kept {0,-20} {1} {2}" -f $nextFileToProtect.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToProtect.DirectoryName 
                Write-host $FirstLine -ForegroundColor Cyan
            }
            foreach($nextFileToDelete in $FilesToDelete)
            {
                "This has been deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt
                $nextFileToDelete | Remove-Item
            }
        }
        else
        {
            #Keep the first file in this case
        
        
            $firstFile=$nextDuplicateGroup.ListOfFiles[0]
            $RestOfFiles=$nextDuplicateGroup.ListOfFiles | select -Skip 1
            
            $FirstLine="This has been kept {0,-20} {1} {2}" -f $firstFile.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName 
            Write-host $FirstLine -ForegroundColor Cyan
            
            foreach($nextFileToDelete in $RestOfFiles)
            {
                "This has been deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt
                $nextFileToDelete | Remove-Item
            }
        }
        Write-Progress -Id 4 -Activity "Duplication groups been progressed" -status "Group completed $GroupCounter" -percentComplete ($GroupCounter / $ListOfDuplicatesGroups.Count*100)
        $GroupCounter++
    }
    Write-Progress -Id 2 -Activity "Duplication groups been progressed" -status "Completed" -percentComplete 100

    Write-host "============================================================================================================="
    Write-host " You can find summary of file that has been deleted in DeletedFilesLog.txt"
    Write-host "============================================================================================================="
    
}

function CleanUpMove()
{
    param($ListOfDuplicatesGroups)
    
    #get output folder from user and validate
    $IsItCorrectPath=$false
    do
    {
        $OutPutPath=Read-Host("Please define directory path where duplicated files can be moved")
    
        if($OutPutPath[$OutPutPath.Length-1] -ne "\")
        {
            $OutPutPath+="\"
        }    
        $CurrentFolder=(Get-Location).Path
        $CurrentFolder+="\"
        
        #the output folder must be a different folder than source folder.
        if($CurrentFolder -eq $OutPutPath)
        {
            write-host "Output folder must be different!"

        }
        else
        {
            $IsItCorrectPath=$true
        }
    }while(!$IsItCorrectPath)

####
    

    #Proceed file moves group by group
    $GroupCounter=0
    foreach($nextDuplicateGroup in $ListOfDuplicatesGroups)
    {
        #Separate the group for first item and rest of the list
        $firstFile=$nextDuplicateGroup.ListOfFiles[0]
        $RestOfFiles=$nextDuplicateGroup.ListOfFiles | select -Skip 1


        $FirstLine="This has been kept {0,-20} {1} {2}" -f $firstFile.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName 
        Write-host $FirstLine -ForegroundColor Cyan

        #Move rest of the list
        foreach($nextFileToDelete in $RestOfFiles)
        {
            "This has been moved {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append MovedFilesLog.txt
            
            #Script will keep original folder structure on new location, so need to build up relative path based on
            #start location and current file location
            $nextFilePath=$OutPutPath+$($nextFileToDelete.FullName.SubString($CurrentFolder.Length))
            $NextFileDirectory=$nextFilePath.Substring(0,$nextFilePath.IndexOf($nextFilePath.Split("\")[$nextFilePath.Split("\").Length-1]))
            
            #Check if do directory exists and move
            if (!(Test-Path -path $NextFileDirectory)) {$temp=New-Item $NextFileDirectory -Type Directory }
            Move-Item $nextFileToDelete.fullname $nextFilePath

            
        }

        Write-Progress -Id 5 -Activity "Duplication groups been progressed" -status "Group completed $GroupCounter" -percentComplete ($GroupCounter / $ListOfDuplicatesGroups.Count*100)
        $GroupCounter++
    }
    Write-Progress -Id 2 -Activity "Duplication groups been progressed" -status "Completed" -percentComplete 100

    Write-host "============================================================================================================="
    Write-host " You can find summary of file that has been moved in MovedFilesLog.txt"
    Write-host "============================================================================================================="
####

    
}



function Find-FileDuplicates()
{
<#
    
.DESCRIPTION
   +---------------------------------------------------------------------------------------------------------------------+
   | Please make a backup before proceed or use DisplayOnly or ReturnObject features |
   +---------------------------------------------------------------------------------------------------------------------+
   This function will scan all files in current folder and sub folders and build a list about duplication in default mode.
   You can use switches to choose one of the following options to manage files:
    
   DisplayOnly / Manual / Auto / DefendedDirectory / Save / ReturnObject / MoveDuplication
 
.EXAMPLE
   Find-FileDuplicates -AfterBehaviour DisplayOnly
   This is the default behaviour. Script will only scan and display result
 
.EXAMPLE
   Find-FileDuplicates -AfterBehaviour Manual
   After scan, script will offer option to keep a selected file after every single duplication group.
 
.EXAMPLE
   Find-FileDuplicates -AfterBehaviour Auto
   After scan, script will delete all duplication except first instance.
 
.EXAMPLE
   Find-FileDuplicates -AfterBehaviour DefendedDirectory
   After scan, script will delete all duplication except copies that located in protected folder instance.
   In case of duplication where no instance found in protected folder, the first instance will be kept.
 
   Script will ask for protected folders after start of the script.
 
.EXAMPLE
   Find-FileDuplicates -AfterBehaviour Save
   After scan, script will save result in an XML file.
 
.EXAMPLE
   Find-FileDuplicates -AfterBehaviour ReturnObject
   After scan, script will return the Object list that can be used for further manipulation.
 
.EXAMPLE
   Find-FileDuplicates -AfterBehaviour MoveDuplication
   After scan, script will clean up automiticlly and keep one instance of it.
   Rest of the duplications will be moved to the target folder with relative path.
   Target folder will need to be provided on interactive way after script start.
 
#>

    
    param(
        [ValidateSet('DisplayOnly','Manual','Auto','DefendedDirectory', 'Save', 'ReturnObject',"MoveDuplication")]
        [Parameter(Mandatory=$false, HelpMessage="Option to choose what script will do after file scan.")]
        [string]$AfterBehaviour="DisplayOnly")
    

    $ListOfDuplicates=FindDuplicates

    Write-Host "$(Get-Date -Format "dd.MM.yyyy-HH:mm:ss [3/3]>") Clean Up"
    
    if($AfterBehaviour -in "Manual", "Auto", "DefendedDirectory")
    {
        $completed=$false
        do
        {
            Write-Host "=============================================================================================================" -ForegroundColor DarkYellow
            Write-Host " Backup before procceding is highly recommended, or if you are not sure try DisplayOnly or MoveDuplication " -ForegroundColor DarkYellow
            Write-Host "=============================================================================================================" -ForegroundColor DarkYellow

            $Confirmation=Read-Host("Do you want to continue?[y/n] ")
            if($Confirmation.ToLower() -eq "y")
            {
                $completed=$true
            }
            elseif($Confirmation.ToLower() -eq "n")
            {
                
                return
            }
        }while(!$completed)
    }


    if($AfterBehaviour -eq "DisplayOnly")
    {
        $ListOfDuplicates
    }
    if($AfterBehaviour -eq "ReturnObject")
    {
        return $ListOfDuplicates
    }
    if($AfterBehaviour -eq "Save")
    {
        Write-host "All object has been exported to FileDuplication.xml"
        return $ListOfDuplicates | Export-CliXML FileDuplication.xml
    }
    if($AfterBehaviour -eq "Manual")
    {
        CleanUpManual $ListOfDuplicates
    }
    if($AfterBehaviour -eq "Auto")
    {
        CleanUpAuto $ListOfDuplicates
    }
    if($AfterBehaviour -eq "DefendedDirectory")
    {
        CleanUpDefaultDir $ListOfDuplicates 
    }
    if($AfterBehaviour -eq "MoveDuplication")
    {
        CleanUpMove $ListOfDuplicates 
    }
}