Functions/Get-TextFileEncoding.ps1
<#
.SYNOPSIS This function retrieves the encoding of a given text file. .DESCRIPTION This function retrieves the encoding of a given text file. Adapted from https://resources.oreilly.com/examples/0636920024132/blob/master/Get-FileEncoding.ps1 #> function Get-TextFileEncoding { [CmdletBinding(PositionalBinding=$true)] [OutputType([String])] param ( # The path to the file on the local machine. [Parameter(Mandatory=$true)] [ValidateNotNullOrEmpty()] [String]$path, # Select the stream where the error messages will be directed. [Parameter(Mandatory=$false)] [ValidateSet("Information", "Warning", "Error", "None")] [String]$outputStream = "Error" ) # Verify that the file has one of the supported text file extensions $fileExtension = [System.IO.Path]::GetExtension($path) if ($fileExtension -notIn $PLAIN_TEXT_FILE_EXTENSIONS) { Write-OutputMessage "The file '$($path)' does not match the list of known text file extensions '$($PLAIN_TEXT_FILE_EXTENSIONS -join ',')'." -OutputStream $outputStream -ReturnMessage:$false return } # Declare the encodings which will be handled by this task $supportedEncodings = @( "utf-8", "utf-16", "utf-16BE", "utf-32", "utf-32BE" ) # Create a hash table to map the encoding bytes to the encoding names $encodings = @{} # Add the encoding preambles to the hash table # The preamble is the bytes at the beginning of the file that is used to identify the encoding foreach ($encoding in ([System.Text.Encoding]::GetEncodings() | Where-Object { $_.Name -in $supportedEncodings })) { $preamble = $encoding.GetEncoding().GetPreamble() if ($preamble) { $encodingBytes = $preamble -join '-' $encodings[$encodingBytes] = $encoding.GetEncoding() } } # Retrieve the lengths of all the preambles $encodingLengths = $encodings.Keys | ForEach-Object { ($_.Split("-")).Count } # Assume UTF-8 $result = [System.Text.Encoding]::UTF8 # Check the file against each of the encoding preamble lengths foreach ($encodingLength in ($encodingLengths | Sort-Object -Descending)) { $beginningBytes = Get-Content -Path $path -Encoding Byte -ReadCount $encodingLength ` | Select-Object -First 1 # Found an encoding with the same preamble bytes if ($encodings.ContainsKey($beginningBytes -join '-')) { $result = $encodings[$beginningBytes -join '-'] break } } # Convert the encoding to a string switch ($result.BodyName) { "utf-8" { return "UTF8" } "utf-32" { return "UTF32" } "utf-16BE" { return "BigEndianUnicode" } "utf-32BE" { return "BigEndianUTF32" } "utf-16" { return "Unicode" } } } |