Functions/Public/Search-URLScan.ps1

function Search-URLScan {
    <#
    .SYNOPSIS
        This function is used to perform a search query against URLScan.io

    .DESCRIPTION
        This function is used to perform a search query against URLScan.io.

    .PARAMETER Query
        The ElasticSearch Query to send to URLScan.io, such as 'domain:mydomain.com'.
        A full list of available search parameters can be found here: https://urlscan.io/docs/search/

    .PARAMETER Domain
        Filter the results by domain. This filters by 'domain' by default, or 'page.domain' when -Strict is specified.

    .PARAMETER IP
        Filter the results by IP. This filters by 'ip' by default, or 'page.ip' when -Strict is specified.

    .PARAMETER Country
        Filter the results by Country. This filters by 'country' by default, or 'page.country' when -Strict is specified.

    .PARAMETER Server
        Filter the results by the HTTP 'Server' header. This filters by 'server' by default, or 'page.server' when -Strict is specified.

    .PARAMETER Hash
        Filter the results by the SHA256 hash of any HTTP response

    .PARAMETER FileName
        Filter the results by the filename of any URL that was requested

    .PARAMETER ASN
        Filter the results by ASN Number. This filters by 'asn' by default, or 'page.asn' when -Strict is specified.

    .PARAMETER ASNName
        Filter the results by ASN Name. This filters by 'asnname' by default, or 'page.asnname' when -Strict is specified.

    .PARAMETER Strict
        Enable strict checking of filter parameters. By default, results will be returned when the the filter has been found or called from anywhere within the requested URL.
        
    .PARAMETER Limit
        The quantity of results you want to return.

    .PARAMETER PageSize
        The number of results to return in a single request. This defaults to 100 for Free/Unregistered users. If you have a subscription, this can be increased in-line with your subscription allowance when specifying an API Key.

    .PARAMETER ReturnAll
        Return all results for your search. This could take an excessively long time and eat away at your API allowance, so use with caution.

    .PARAMETER RateLimitPause
        The -RateLimitPause parameter is used to define the minimum percentage that can be reached of the allowable rate limit before queries are paused.

    .PARAMETER Silent
        The -Silent parameter is used to silence the Write-Host messages returned during scans.

    .PARAMETER APIKey
        The -APIKey parameter enables you to specify an API Key if you have an account with URLScan.io. This will enable higher query limits and larger page sizes.
        This is only necessary if your API Key has not been saved using Set-URLScanConfiguration

    .EXAMPLE
        PS> Search-URLScan -Query 'task.url:google.com AND country:gb AND page.server:nginx' | Select-Object -ExpandProperty task | ft -AutoSize

        (94/94): URLScan.io Results Returned.

        visibility method domain apexDomain time uuid url
        ---------- ------ ------ ---------- ---- ---- ---
        public manual google.com google.com 5/16/2024 5:33:33 AM 42e9e5ec-e10e-441d-be9a-99865a2ae0ad https://google.com/amp/s/lido.community
        public api www.hvacrecruitment.com hvacrecruitment.com 4/15/2024 1:07:20 PM 4159106e-f034-47b0-8a66-5c0145cceb04 https://www.hvacrecruitment.com/helpdesk-recruitment-agency?source=google.com
        public api www1-google.com www1-google.com 4/15/2024 6:59:00 AM d307c542-1445-4dee-a7e2-1045fcd72d3b http://www1-google.com/
        public manual google.com google.com 3/31/2024 7:32:44 PM 72b7e643-7134-4563-a2a5-eb2621156117 https://google.com/amp/s/coinmarketcap.network
        public manual google.com google.com 3/28/2024 6:47:27 AM 4433dc67-2b4c-4b47-927d-6b36d2fe7534 https://google.com/amp/s/lido.fi.accelerator-program.support
        public manual google.com google.com 3/27/2024 6:17:43 AM d14d8e14-7ef8-442f-81c6-3e2ce5ce21e1 https://google.com/amp/s/lido.fi.accelerator-program.support
        public manual google.com google.com 3/23/2024 7:52:49 AM 7820fbc1-6c44-409b-a099-3d2d58d059f1 https://google.com/amp/s/t2m.io/abjriGb?Claim-500USDT-Voucher
        public manual google.com google.com 3/22/2024 8:16:39 PM 31b9a33b-b0cd-4cd5-83e5-1804ac1f8a95 https://google.com/amp/s/mailserver.promo?Claim-Voucher
        public manual google.com google.com 3/11/2024 11:35:33 AM 482b916d-57e8-4e3c-9748-c1d9d95f9880 https://google.com/amp/s/mailing-database.com
        public manual google.com google.com 2/28/2024 12:47:37 AM f03c344b-d2cc-4f38-93df-50b3a74e9465 https://google.com/amp/s/insights-ripple.com
        public manual google.com google.com 2/27/2024 9:15:42 PM 3299b0b9-e1e9-4f0a-b99e-f2d1833fc4dd https://google.com/amp/s/insights-ripple.com
        ...

    .EXAMPLE
        PS> $Results = Search-URLScan -Domain 'google.com' -Limit 300
        
        Query Size Exceeds Page Size 100. Enabling paging of results..
        (300/300): URLScan.io Results Returned..

        $Results.task | ft -AutoSize

        visibility method domain apexDomain time uuid url
        ---------- ------ ------ ---------- ---- ---- ---
        public api ym556.top ym556.top 16/06/2024 08:43:42 d1667410-dc7e-49a1-8d76-7f44860a99e1 http://ym556.top/
        public api mold-removal-en-blaster.today mold-removal-en-blaster.today 16/06/2024 08:43:40 67d2f955-bd05-4dc1-a1ea-93a0e7c5fc7a https://mold-removal-en-blaster.today/
        public api deberardfamilyfoundation.org deberardfamilyfoundation.org 16/06/2024 08:43:37 f7448a7f-39a1-49fd-a601-9a0016762561 http://deberardfamilyfoundation.org/
        public api mxcdonolxoeew.aloepsychology.com aloepsychology.com 16/06/2024 08:43:36 3e737098-6824-47f3-8f36-d48943e23aab https://mxcdonolxoeew.aloepsychology.com/
        public api toiturepereetfilsrenovationcouverture.ovh toiturepereetfilsrenovationcouverture.ovh 16/06/2024 08:43:36 20947fb7-7e4c-4f28-9305-11b5b6314f0d http://toiturepereetfilsrenovationcouverture.ovh/
        public api republicaustin.us republicaustin.us 16/06/2024 08:43:36 d87d1512-8f57-4ef2-914b-00a0ac51a154 https://republicaustin.us/
        public api satoi-electricalindustry.com satoi-electricalindustry.com 16/06/2024 08:43:35 42b8d37f-43d1-4dce-ac59-3bdecb0b0bf1 https://satoi-electricalindustry.com/
        public api esgsoftware729505.life esgsoftware729505.life 16/06/2024 08:43:35 d2943fa1-c166-40b2-8ac7-1dffa120b7cd https://esgsoftware729505.life/
        public automatic yandex.lpfatture.hearingaid1.xyz hearingaid1.xyz 16/06/2024 08:43:34 b5e74964-28d8-49f5-acc4-f9f2c905b990 https://yandex.lpfatture.hearingaid1.xyz/
        public automatic servicedesk.adra.org.ua adra.org.ua 16/06/2024 08:43:33 c42f91fa-bb40-4566-ba6a-625698a8edd4 https://servicedesk.adra.org.ua/
        public api ngis.co ngis.co 16/06/2024 08:43:32 03a6a935-c708-4fce-be4c-9086ae6cd808 http://ngis.co/
        public api steamcommunitiny.com steamcommunitiny.com 16/06/2024 08:43:32 422a6d64-5427-43ab-9faf-f0049d1b8f73 http://steamcommunitiny.com/
        public automatic oahlw.uberfreight311.xyz uberfreight311.xyz 16/06/2024 08:43:31 a39f6056-16ec-42f5-b529-ce67333b28cb https://oahlw.uberfreight311.xyz/
        public automatic sslvpn.bodymindsoulusa.com bodymindsoulusa.com 16/06/2024 08:43:30 2e18233d-3753-4548-8445-2b068b9fcdbb https://sslvpn.bodymindsoulusa.com/
        public automatic nalozhka.nalozhka.www.kwid9.24-hour-sewer-service2.xyz 24-hour-sewer-service2.xyz 16/06/2024 08:43:29 43a55c3b-22a6-4e0a-bf36-d8902fbfa062 https://nalozhka.nalozhka.www.kwid9.24-hour-sewer-service2.…
        ...

    .EXAMPLE
        ## Return All Results for selected query
        PS> $Results = Search-URLScan -Domain 'bbc.co.uk' -Strict -ReturnAll -PageSize 1000
        
        (9027/9027): All URLScan.io Results Returned.

    .FUNCTIONALITY
        URLScan.io
    #>

    param(
        [Parameter(ParameterSetName='Query',Mandatory=$true)]
        [String]$Query,
        [Parameter(ParameterSetName='Filters')]
        [String]$Domain,
        [Parameter(ParameterSetName='Filters')]
        [String]$IP,
        [Parameter(ParameterSetName='Filters')]
        [String]$Country,
        [Parameter(ParameterSetName='Filters')]
        [String]$Server,
        [Parameter(
            ParameterSetName='Filters',
            ValueFromPipelineByPropertyName = $true
        )]
        [String]$Hash,
        [Parameter(ParameterSetName='Filters')]
        [String]$Filename,
        [Parameter(ParameterSetName='Filters')]
        [String]$ASN,
        [Parameter(ParameterSetName='Filters')]
        [String]$ASNName,
        [Parameter(ParameterSetName='Filters')]
        [Switch]$Strict,
        [Int]$Limit,
        [Int]$PageSize = 100,
        [Switch]$ReturnAll,
        [Switch]$Silent,
        [String]$APIKey,
        [Int]$RateLimitPause = 5
    )

    begin {
        if ($Limit -and $ReturnAll) {
            Write-Error "-Limit & -ReturnAll are mutually exclusive parameters."
            break
        } elseif (!($Limit)) {
            $Limit = 100
        }

        $Headers = Get-URLScanHeaders -APIKey $($APIKey)

        ## Check if Default Page Size has been set
        if ($ENV:URLScanPageLimit -and $PageSize -eq 100) {
            $PageSize = $ENV:URLScanPageLimit
        }

        ## If Size > PageSize, enable paging of results
        if (($Limit -gt $PageSize) -or $ReturnAll) {
            $QuerySize = $PageSize
            if (!($ReturnAll)) {
                if (!($Silent)) { Write-Host "Query Size Exceeds Page Size $($PageSize). Enabling paging of results.." -ForegroundColor Blue}
            }
        } else {
            $QuerySize = $Limit
        }
    }

    process {
        ## Initilize Result Count and Results Array
        $ResultCount = 0
        $Results = @()

        ## Initilize Query
        $QueryFilters = @()
        $PSBoundParameters.GetEnumerator() | ForEach-Object {
            try {
                $Key = $_.Key.ToLower()
                $Value = $_.Value.ToLower()

                Switch($_.Key) {
                    Domain { if ($PSBoundParameters.Strict) { $Key = 'page.domain' }; $QueryFilters += $Key + ':' + $Value }
                    IP { if ($PSBoundParameters.Strict) { $Key = 'page.ip' }; $QueryFilters += $Key + ':' + $Value }
                    Country { if ($PSBoundParameters.Strict) { $Key = 'page.country' }; $QueryFilters += $Key + ':' + $Value }
                    Server { if ($PSBoundParameters.Strict) { $Key = 'page.server' }; $QueryFilters += $Key + ':' + $Value }
                    Hash { $QueryFilters += $Key + ':' + $Value }
                    Filename { $QueryFilters += $Key + ':' + $Value }
                    ASN { if ($PSBoundParameters.Strict) { $Key = 'page.asn' }; $QueryFilters += $Key + ':' + $Value }
                    ASNName { if ($PSBoundParameters.Strict) { $Key = 'page.asnname' }; $QueryFilters += $Key + ':' + $Value }
                }
            } catch {
                ## Ignore
            }
        }
        if ($PSCmdlet.ParameterSetName -eq 'Filters') {
            if ($QueryFilters.Count -ge 2) {
                $Query = $QueryFilters -join ' AND '
            } else {
                $Query = $QueryFilters
            }
        }
        while ($true) {
            if (!($ReturnAll)) {
                if (($ResultCount -ge $Limit) -or ($EndOfPaging)) {
                    break
                }
            } else {
                if ($AllResultsReturned) {
                    break
                }
            }
            ## Check if -ReturnAll has been specified.
            if (!($ReturnAll)) {
                ## Check if $Limit is less than $PageSize, and adjust page size accordingly.
                if ($Limit -lt $PageSize) {
                    $PageSize = $Limit
                }
            }
            ## Check if Result Count + Page Size is Greater than requested results.
            ## If so, adjust the Query Size accordingly to ensure the correct number of results.
            if (($ResultCount+$PageSize -gt $Limit) -and (!($ReturnAll))) {
                $QuerySize = $Limit-$ResultCount
            }

            ## If $JSONResult is already populated from previous loop, append the search_after query parameter to enable paging. Rate Limiting is applied here too.
            if ($JSONResult) {
                $SearchAfter = "&search_after=$(($JSONResult.results | Select-Object -Last 1).sort -join ",")"

                Apply-RateLimiting -Headers $Result.Headers -RateLimitPause $RateLimitPause
            }

            try {
                $Result = Invoke-WebRequest -Method GET -Uri "https://urlscan.io/api/v1/search/?q=$($Query)&size=$($QuerySize)$($SearchAfter)" -Headers $Headers
            } catch {
                Write-Error $_
                break
            }
            
            $JSONResult = $Result.Content | ConvertFrom-Json

            ## Check if Result Count is less than requested page size, and additionally if the Result Count is not equal to the requested number of results.
            ## This would indicate that paging is necessary.
            if (($JSONResult.results.count -lt $PageSize) -and ($JSONResult.results.count -ne $Limit)) {
                ## Further checks to see if paging is required, based on hard set result limits by the API.
                if ($JSONResult.results.count -in @('100','1000','10000')) {
                    if (!($Silent)) { Write-Host "Requested Page Size: $($PageSize) but only $($JSONResult.results.count) results returned. Adjusting page size to: $($JSONResult.results.count)" -ForegroundColor Yellow }
                    $PageSize = $JSONResult.results.count
                    $QuerySize = $PageSize
                } else {
                    $EndOfPaging = $true
                    $AllResultsReturned = $true
                }
            }

            if ($JSONResult.results) {
                ## Append results to Array
                $Results += $JSONResult.results

                ## Write Visible Count
                if (!($ReturnAll)) {
                    if (($JSONResult.results.Count -lt $PageSize) -or ($JSONResult.results.Count -eq $Limit)) {
                        if (!($Silent)) { Write-Host -NoNewLine "`r($($Results.Count)/$($Results.Count)): URLScan.io Results Returned." -ForegroundColor Green }
                    } else {
                        if (!($Silent)) { Write-Host -NoNewLine "`r($($Results.Count)/$($Limit)): URLScan.io Results Returned.." -ForegroundColor Cyan }
                    }
                } else {
                    if ($JSONResult.results.Count -lt $PageSize) {
                        if (!($Silent)) { Write-Host -NoNewLine "`r($($Results.Count)/$($Results.Count)): All URLScan.io Results Returned." -ForegroundColor Green }
                    } else {
                        if (!($Silent)) { Write-Host -NoNewLine "`r($($Results.Count)/??): URLScan.io Results Returned.." -ForegroundColor Cyan }
                    }
                }

                ## Increment Result Count for loop tracking
                $ResultCount += $JSONResult.results.count
            } else {
                if ($ReturnAll) {
                    $AllResultsReturned = $true
                }
                break
            }
        }
        Write-Host ""
        return $Results
    }
}