Functions/Find-HomoglyphsInRepo.ps1
<#
.Synopsis Scans the specified repo for homoglyphs .DESCRIPTION Scans the specified repo for homoglyphs. Find-HomoglyphsInRepo is slow. I wrote it for fun, not for practicality. Use Get-HomoglyphsInFile after downloading the files locally for faster procesing. .EXAMPLE Find-HomoglyphsInRepo -OwnerName azure -RepositoryName azure-powershell -FileType "*.PS1" -TempDir C:\temp -Predefined PowerShell Gets all values as parsed by PowerShell AST .EXAMPLE $results = Find-HomoglyphsInRepo -OwnerName paulhcode -RepositoryName RecurringADChecks -TempDir C:\temp\ -Predefined PowerShell Remove-Uninteresting $results Finds all items of interest in the repo then finds the homoglyphs .PARAMETER OwnerName The name of the owner of the GitHub repository to scan .PARAMETER RepositoryName The name of the GitHub repository to scan .PARAMETER FileType Specify a subset of files to scan, for example "*.ps1" .PARAMETER TempDir A temporary directory to download files to for processing .PARAMETER AccessToken GitHub access token, specify it to decrease throttling by GitHub .PARAMETER RemoveUninteresting Removes any items that are not homoglyphs of other items in the data processed .Link https://github.com/paulhcode #> Function Find-HomoglyphsInRepo { [CmdletBinding()] param( [Parameter(Mandatory = $true, ValueFromPipelineByPropertyName = $true, Position = 0)] [string] $OwnerName, #= "PowerShell" [Parameter(Mandatory = $true, ValueFromPipelineByPropertyName = $true, Position = 1)] [string] $RepositoryName, [Parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, Position = 2)] [string] $FileType = '*', [ValidateSet('PowerShell', 'Text')] [string] $Predefined, [Parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, Position = 3)] [string] [ValidateScript({ Test-Path $_ -PathType Container })] $TempDir, [Parameter(Mandatory = $false, ValueFromPipelineByPropertyName = $true, Position = 4)] [string] [ValidateScript({ $_.Length -eq 40 })] $AccessToken, [switch] $RemoveUninteresting ) If ($AccessToken) { $ListOfFiles = Get-GitHubContentRecursively -OwnerName $OwnerName -RepositoryName $RepositoryName -AccessToken:$AccessToken } Else { $ListOfFiles = Get-GitHubContentRecursively -OwnerName $OwnerName -RepositoryName $RepositoryName } # write-verbose "List of files = $ListofFiles" #scan the files in the org $RepoValues = @() $count = 0 # If (!(Test-Path $TempDir)) { mkdir $TempDir } ForEach ($file in $ListOfFiles) { Write-Progress -Activity "Scanning $count of $($ListOfFiles.count) files" -PercentComplete $($count / $($ListOfFiles.count) * 100) -Id 1 -CurrentOperation $($file.html_url) $destinationFile = Join-Path $TempDir ((split-path $file -leaf)<#.Replace('.', '')#>) #add back in replace if needed, forgot why i thought it was needed at one point # write-verbose "DestinationFile = $destinationFile" curl -L -o $destinationFile $file #Scan the file $RepoValues += Get-HomoglyphsInFile -FullName $destinationFile -Predefined $Predefined #Get-ValuesFromPS -FileName ".\$($file.name)" #I should include a way to include additional metadata like the HTMLURL for the file to make it easier to find later # write-verbose "Removing $destinationFile" Remove-Item $destinationFile #If it is sketchy, then keep it $count++ } If ($RemoveUninteresting) { Remove-Uninteresting $RepoValues } Else { $RepoValues } } |