Files.ps1

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

function Resolve-ScriptPath
{
    <#
    .SYNOPSIS
        Resolve path that is local to the script
 
    .DESCRIPTION
        During script development it is useful to copy-paste function code and call
        scripts in the local folder. But for reusability in the script files it is
        best to combine paths with $PsScriptRoot variable that is available only
        when called from withing a script.
 
        This function bring good from both of the worlds together. Resolving paths
        with this function allows to:
        - Copy-paste code from editor. Paths would be resolved relative to
          current folder.
        - Use $PsScriptRoot when script is being called. Path would be resolved
          relative to script root folder.
 
    .PARAMETER Path
        Path to be resolved.
 
    .EXAMPLE
        Resolve-ScriptPath "Utils.ps1"
 
        When executed in console on copy-paste it would resolve to '.\Utils.ps1',
        but when executed from a script that somebody calls it would resolve to
        'Drive:\Path\To\Script\Folder\Utils.ps1'
    #>


    param
    (
        [Parameter(Mandatory = $true)]
        [string] $Path
    )

    $location = if( $myInvocation.PSScriptRoot )
    {
        $myInvocation.PSScriptRoot
    }
    else
    {
        "."
    }

    Join-Path $location $path
}

function Get-FileEncoding
{
    <#
    .SYNOPSIS
        Gets file encoding
 
    .DESCRIPTION
        Useful if you want to update large volume of files and don't want
        to have regressions coming from encoding changes as a side-effect.
 
    .PARAMETER Path
        The path to the file you need get encoding from.
 
    .EXAMPLE
        Get-FileEncoding main.cpp
 
        Get encoding that is main.cpp file uses.
 
    .LINK
        http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html
 
    .NOTES
        Default encoding behaves as ASCII with support of currently used
        windows code page
    #>


    param
    (
        [Parameter(Mandatory = $true)]
        [string] $Path
    )

    function Test-Preamble( $encoding, [byte[]] $filePreamble )
    {
        [byte[]] $preamble = $encoding.GetPreamble()

        if( $filePreamble.Count -lt $preamble.Count )
        {
            return false
        }

        for( $i = 0; $i -lt $preamble.Count; $i += 1 )
        {
            if( $filePreamble[$i] -ne $preamble[$i] )
            {
                return $false
            }
        }

        return $true
    }

    $knownEncodings = @(
        [Text.Encoding]::BigEndianUnicode,
        [Text.Encoding]::UTF32,
        [Text.Encoding]::UTF8,
        [Text.Encoding]::Unicode, # that's UTF16
        [Text.Encoding]::Default  # must come last
    )

    [byte[]] $byte = Get-Content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path

    foreach( $encoding in $knownEncodings )
    {
        if( Test-Preamble $encoding $byte )
        {
            return $encoding
        }
    }

    # Usually Default encoding preamble is empty and we return it, but in case
    # that's not true we assume file without preamble to be UTP7 encoded
    [Text.Encoding]::UTF7
}

function ConvertTo-LineEnding
{
    <#
    .SYNOPSIS
        Convert line endings of a file to LF or CRLF
 
    .DESCRIPTION
        Reads a text file, converts all line endings to the specified format,
        and writes it back. Validates that the file exists, is a text file,
        and is not a binary file before making changes.
 
    .PARAMETER Path
        The path to the file whose line endings should be converted.
        Accepts pipeline input and wildcards.
 
    .PARAMETER LF
        Convert line endings to Unix-style LF (\\n).
 
    .PARAMETER CRLF
        Convert line endings to Windows-style CRLF (\\r\\n).
 
    .EXAMPLE
        ConvertTo-LineEnding -Path "script.ps1" -LF
 
        Converts script.ps1 line endings to LF.
 
    .EXAMPLE
        Get-ChildItem *.ps1 | ConvertTo-LineEnding -CRLF
 
        Converts all .ps1 files in the current directory to CRLF line endings.
 
    .EXAMPLE
        ConvertTo-LineEnding -Path "README.md" -CRLF
 
        Converts README.md line endings to CRLF.
    #>


    [CmdletBinding(SupportsShouldProcess)]
    param
    (
        [Parameter(Mandatory = $true, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)]
        [Alias("FullName")]
        [string[]] $Path,

        [Parameter(Mandatory = $true, ParameterSetName = "LF")]
        [switch] $LF,

        [Parameter(Mandatory = $true, ParameterSetName = "CRLF")]
        [switch] $CRLF
    )

    process
    {
        foreach( $filePath in $Path )
        {
            $resolvedPaths = Resolve-Path $filePath -ErrorAction SilentlyContinue
            if( -not $resolvedPaths )
            {
                Write-Error "File not found: $filePath"
                continue
            }

            foreach( $resolved in $resolvedPaths )
            {
                $file = $resolved.Path

                if( -not (Test-Path $file -PathType Leaf) )
                {
                    Write-Error "Not a file: $file"
                    continue
                }

                # Check for binary content by reading raw bytes and looking for null bytes
                $bytes = [System.IO.File]::ReadAllBytes($file)
                $sampleSize = [Math]::Min($bytes.Length, 8192)
                $hasBinaryContent = $false
                for( $i = 0; $i -lt $sampleSize; $i++ )
                {
                    if( $bytes[$i] -eq 0 )
                    {
                        $hasBinaryContent = $true
                        break
                    }
                }

                if( $hasBinaryContent )
                {
                    Write-Error "File appears to be binary and cannot have line endings converted: $file"
                    continue
                }

                if( $bytes.Length -eq 0 )
                {
                    Write-Warning "File is empty, skipping: $file"
                    continue
                }

                if( $PSCmdlet.ShouldProcess($file, "Convert line endings to $($PSCmdlet.ParameterSetName)") )
                {
                    # Read with the file's current encoding to preserve it
                    $encoding = Get-FileEncoding -Path $file
                    $content = [System.IO.File]::ReadAllText($file, $encoding)

                    # Normalize all line endings to LF first, then convert to target
                    $content = $content -replace "`r`n", "`n"
                    $content = $content -replace "`r", "`n"

                    if( $CRLF )
                    {
                        $content = $content -replace "`n", "`r`n"
                    }

                    [System.IO.File]::WriteAllText($file, $content, $encoding)
                    Write-Output "Converted line endings to $($PSCmdlet.ParameterSetName): $file"
                }
            }
        }
    }
}

function ConvertTo-Utf8
{
    <#
    .SYNOPSIS
        Convert a file's encoding to UTF-8
 
    .DESCRIPTION
        Reads a file using its current encoding, then writes it back as UTF-8.
        Skips files that are already UTF-8 encoded. Validates that the file
        exists and is not a binary file before making changes.
 
    .PARAMETER Path
        The path to the file whose encoding should be converted to UTF-8.
        Accepts pipeline input and wildcards.
 
    .PARAMETER NoBom
        Write UTF-8 without a Byte Order Mark (BOM). Default includes BOM.
 
    .EXAMPLE
        ConvertTo-Utf8 -Path "script.ps1"
 
        Converts script.ps1 to UTF-8 with BOM.
 
    .EXAMPLE
        ConvertTo-Utf8 -Path "README.md" -NoBom
 
        Converts README.md to UTF-8 without BOM.
 
    .EXAMPLE
        Get-ChildItem *.txt | ConvertTo-Utf8
 
        Converts all .txt files in the current directory to UTF-8.
    #>


    [CmdletBinding(SupportsShouldProcess)]
    param
    (
        [Parameter(Mandatory = $true, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)]
        [Alias("FullName")]
        [string[]] $Path,

        [switch] $NoBom
    )

    process
    {
        foreach( $filePath in $Path )
        {
            $resolvedPaths = Resolve-Path $filePath -ErrorAction SilentlyContinue
            if( -not $resolvedPaths )
            {
                Write-Error "File not found: $filePath"
                continue
            }

            foreach( $resolved in $resolvedPaths )
            {
                $file = $resolved.Path

                if( -not (Test-Path $file -PathType Leaf) )
                {
                    Write-Error "Not a file: $file"
                    continue
                }

                # Check for binary content
                $bytes = [System.IO.File]::ReadAllBytes($file)
                $sampleSize = [Math]::Min($bytes.Length, 8192)
                $hasBinaryContent = $false
                for( $i = 0; $i -lt $sampleSize; $i++ )
                {
                    if( $bytes[$i] -eq 0 )
                    {
                        $hasBinaryContent = $true
                        break
                    }
                }

                if( $hasBinaryContent )
                {
                    Write-Error "File appears to be binary and cannot be re-encoded: $file"
                    continue
                }

                if( $bytes.Length -eq 0 )
                {
                    Write-Warning "File is empty, skipping: $file"
                    continue
                }

                $currentEncoding = Get-FileEncoding -Path $file
                $targetEncoding = if( $NoBom ) { [System.Text.UTF8Encoding]::new($false) } else { [System.Text.Encoding]::UTF8 }

                # Check if already UTF-8
                if( $currentEncoding.CodePage -eq 65001 )
                {
                    # If already UTF-8, check BOM preference
                    $hasBomNow = $bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF
                    $wantsBom = -not $NoBom

                    if( $hasBomNow -eq $wantsBom )
                    {
                        Write-Output "Already UTF-8, skipping: $file"
                        continue
                    }
                }

                if( $PSCmdlet.ShouldProcess($file, "Convert encoding from $($currentEncoding.EncodingName) to UTF-8$(if($NoBom){' (no BOM)'})") )
                {
                    $content = [System.IO.File]::ReadAllText($file, $currentEncoding)
                    [System.IO.File]::WriteAllText($file, $content, $targetEncoding)
                    Write-Output "Converted to UTF-8$(if($NoBom){' (no BOM)'}): $file"
                }
            }
        }
    }
}