Public/Resolve-Uri.ps1
<#
.SYNOPSIS Resolve-Uri resolves a URI and retrieves information such as file size, last modified date, and filename. .DESCRIPTION The Resolve-Uri function resolves the given URI and retrieves information such as file size, last modified date, and filename. It sends a GET request to the URL and retrieves the headers to extract the required information. .PARAMETER Uri The URI to resolve. This parameter is mandatory. URL is accepted as an alias. .PARAMETER UserAgent The user agent string to use for the request. By default, it uses two user agent strings: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' and 'Googlebot/2.1 (+http://www.google.com/bot.html)'. You can specify multiple user agent strings as an array. .PARAMETER Headers Additional headers to include in the request. Default is @{'accept' = '*/*'}, which is needed to trick some servers into serving a download, such as from FileZilla. .OUTPUTS The function outputs a custom object with the following properties: - Uri: The original URL. - AbsoluteUri: The resolved URL after any redirections. - FileName: The extracted filename from the URL or headers. - FileSizeBytes: The file size in bytes. - FileSizeReadable: The file size in a human-readable format. - LastModified: The last modified date of the file. .EXAMPLE Resolve-Uri -Uri 'https://example.com/file.txt' Resolves the URL 'https://example.com/file.txt' and retrieves the file information. .EXAMPLE Resolve-Uri -Uri 'https://example.com/file.txt' -UserAgent 'My User Agent' Resolves the URL 'https://example.com/file.txt' using the specified user agent string. .EXAMPLE Resolve-Uri -Uri 'https://example.com/file.txt' -Headers @{ 'Authorization' = 'Bearer token' } Resolves the URL 'https://example.com/file.txt' and includes the 'Authorization' header in the request. #> function Resolve-Uri { [CmdletBinding()] param( [Parameter(Position = 0, Mandatory = $true, ValueFromPipeline = $true)] [Alias('Url')] [ValidateNotNullOrEmpty()] [string]$Uri, [string[]]$UserAgent = @('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', 'Googlebot/2.1 (+http://www.google.com/bot.html)'), [hashtable]$Headers = @{accept = '*/*' } ) begin { # Required on Windows Powershell only if ($PSEdition -eq 'Desktop') { Add-Type -AssemblyName System.Net.Http Add-Type -AssemblyName System.Web } # Enable TLS 1.2 in addition to whatever is pre-configured [Net.ServicePointManager]::SecurityProtocol = [Net.ServicePointManager]::SecurityProtocol -bor [Net.SecurityProtocolType]::Tls12 # Create one single client object for the pipeline $HttpClient = New-Object System.Net.Http.HttpClient if ($null -ne $Headers) { foreach ($Header in $Headers.GetEnumerator()) { $HttpClient.DefaultRequestHeaders.Add($Header.Key, $Header.Value) } } } process { # Reset variables $FileName = $AbsoluteUri = $FileSizeBytes = $FileSizeReadable = $LastModified = $null Write-Verbose "$($MyInvocation.MyCommand): Requesting headers from URL '$Uri'" foreach ($UserAgentString in $UserAgent) { $HttpClient.DefaultRequestHeaders.Remove('User-Agent') | Out-Null if ($UserAgentString) { Write-Verbose "$($MyInvocation.MyCommand): Using UserAgent '$UserAgentString'" $HttpClient.DefaultRequestHeaders.Add('User-Agent', $UserAgentString) } # This sends a GET request but only retrieves the headers $ResponseHeader = $HttpClient.GetAsync($Uri, [System.Net.Http.HttpCompletionOption]::ResponseHeadersRead).Result # Exit the foreach if success if ($ResponseHeader.IsSuccessStatusCode) { break } } if ($ResponseHeader.IsSuccessStatusCode) { Write-Verbose "$($MyInvocation.MyCommand): Successfully retrieved headers" if ($ResponseHeader.RequestMessage.RequestUri.AbsoluteUri -ne $Uri) { Write-Verbose "$($MyInvocation.MyCommand): URL '$Uri' redirects to '$($ResponseHeader.RequestMessage.RequestUri.AbsoluteUri)'" } try { $FileSizeBytes = $null $FileSizeBytes = [int]$ResponseHeader.Content.Headers.GetValues('Content-Length')[0] $FileSizeReadable = switch ($FileSizeBytes) { { $_ -gt 1TB } { '{0:n2} TB' -f ($_ / 1TB); Break } { $_ -gt 1GB } { '{0:n2} GB' -f ($_ / 1GB); Break } { $_ -gt 1MB } { '{0:n2} MB' -f ($_ / 1MB); Break } { $_ -gt 1KB } { '{0:n2} KB' -f ($_ / 1KB); Break } default { '{0} B' -f $_ } } Write-Verbose "$($MyInvocation.MyCommand): File size: $FileSizeBytes bytes ($FileSizeReadable)" } catch { Write-Verbose "$($MyInvocation.MyCommand): Unable to determine file size" } # Try to get the last modified date from the "Last-Modified" header, use error handling in case string is in invalid format try { $LastModified = $null $LastModified = [DateTime]::ParseExact($ResponseHeader.Content.Headers.GetValues('Last-Modified')[0], 'r', [System.Globalization.CultureInfo]::InvariantCulture) Write-Verbose "$($MyInvocation.MyCommand): Last modified: $($LastModified.ToString())" } catch { Write-Verbose "$($MyInvocation.MyCommand): Last-Modified header not found" } if ($FileName) { $FileName = $FileName.Trim() Write-Verbose "$($MyInvocation.MyCommand): Will use supplied filename '$FileName'" } else { # Get the file name from the "Content-Disposition" header if available try { $ContentDispositionHeader = $null $ContentDispositionHeader = $ResponseHeader.Content.Headers.GetValues('Content-Disposition')[0] Write-Verbose "$($MyInvocation.MyCommand): Content-Disposition header found: $ContentDispositionHeader" } catch { Write-Verbose "$($MyInvocation.MyCommand): Content-Disposition header not found" } if ($ContentDispositionHeader) { $ContentDispositionRegEx = @' ^.*filename\*?\s*=\s*"?(?:UTF-8|iso-8859-1)?(?:'[^']*?')?([^";]+) '@ if ($ContentDispositionHeader -match $ContentDispositionRegEx) { # GetFileName ensures we are not getting a full path with slashes. UrlDecode will convert characters like %20 back to spaces. $FileName = [System.IO.Path]::GetFileName([System.Web.HttpUtility]::UrlDecode($matches[1])) # If any further invalid filename characters are found, convert them to spaces. [IO.Path]::GetinvalidFileNameChars() | ForEach-Object { $FileName = $FileName.Replace($_, ' ') } $FileName = $FileName.Trim() Write-Verbose "$($MyInvocation.MyCommand): Extracted filename '$FileName' from Content-Disposition header" } else { Write-Verbose "$($MyInvocation.MyCommand): Failed to extract filename from Content-Disposition header" } } if ([string]::IsNullOrEmpty($FileName)) { # If failed to parse Content-Disposition header or if it's not available, extract the file name from the absolute URL to capture any redirections. # GetFileName ensures we are not getting a full path with slashes. UrlDecode will convert characters like %20 back to spaces. The URL is split with ? to ensure we can strip off any API parameters. $FileName = [System.IO.Path]::GetFileName([System.Web.HttpUtility]::UrlDecode($ResponseHeader.RequestMessage.RequestUri.AbsoluteUri.Split('?')[0])) [IO.Path]::GetinvalidFileNameChars() | ForEach-Object { $FileName = $FileName.Replace($_, ' ') } $FileName = $FileName.Trim() Write-Verbose "$($MyInvocation.MyCommand): Extracted filename '$FileName' from absolute URL '$($ResponseHeader.RequestMessage.RequestUri.AbsoluteUri)'" } } } else { throw "$($MyInvocation.MyCommand): Failed to retrieve headers from $($Uri): $([int]$ResponseHeader.StatusCode): $($ResponseHeader.ReasonPhrase)" } if ([string]::IsNullOrEmpty($FileName)) { # If still no filename set, extract the file name from the original URL. # GetFileName ensures we are not getting a full path with slashes. UrlDecode will convert characters like %20 back to spaces. The URL is split with ? to ensure we can strip off any API parameters. $FileName = [System.IO.Path]::GetFileName([System.Web.HttpUtility]::UrlDecode($Uri.Split('?')[0])) [System.IO.Path]::GetInvalidFileNameChars() | ForEach-Object { $FileName = $FileName.Replace($_, ' ') } $FileName = $FileName.Trim() Write-Verbose "$($MyInvocation.MyCommand): Extracted filename '$FileName' from original URL '$Uri'" } [PSCustomObject]@{ Uri = $Uri AbsoluteUri = $ResponseHeader.RequestMessage.RequestUri.AbsoluteUri FileName = $FileName FileSizeBytes = $FileSizeBytes FileSizeReadable = $FileSizeReadable LastModified = $LastModified } } end { $HttpClient.Dispose() } } |