Files.ps1
|
# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. function Resolve-ScriptPath { <# .SYNOPSIS Resolve path that is local to the script .DESCRIPTION During script development it is useful to copy-paste function code and call scripts in the local folder. But for reusability in the script files it is best to combine paths with $PsScriptRoot variable that is available only when called from withing a script. This function bring good from both of the worlds together. Resolving paths with this function allows to: - Copy-paste code from editor. Paths would be resolved relative to current folder. - Use $PsScriptRoot when script is being called. Path would be resolved relative to script root folder. .PARAMETER Path Path to be resolved. .EXAMPLE Resolve-ScriptPath "Utils.ps1" When executed in console on copy-paste it would resolve to '.\Utils.ps1', but when executed from a script that somebody calls it would resolve to 'Drive:\Path\To\Script\Folder\Utils.ps1' #> param ( [Parameter(Mandatory = $true)] [string] $Path ) $location = if( $myInvocation.PSScriptRoot ) { $myInvocation.PSScriptRoot } else { "." } Join-Path $location $path } function Get-FileEncoding { <# .SYNOPSIS Gets file encoding .DESCRIPTION Useful if you want to update large volume of files and don't want to have regressions coming from encoding changes as a side-effect. .PARAMETER Path The path to the file you need get encoding from. .EXAMPLE Get-FileEncoding main.cpp Get encoding that is main.cpp file uses. .LINK http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html .NOTES Default encoding behaves as ASCII with support of currently used windows code page #> param ( [Parameter(Mandatory = $true)] [string] $Path ) function Test-Preamble( $encoding, [byte[]] $filePreamble ) { [byte[]] $preamble = $encoding.GetPreamble() if( $filePreamble.Count -lt $preamble.Count ) { return false } for( $i = 0; $i -lt $preamble.Count; $i += 1 ) { if( $filePreamble[$i] -ne $preamble[$i] ) { return $false } } return $true } $knownEncodings = @( [Text.Encoding]::BigEndianUnicode, [Text.Encoding]::UTF32, [Text.Encoding]::UTF8, [Text.Encoding]::Unicode, # that's UTF16 [Text.Encoding]::Default # must come last ) [byte[]] $byte = Get-Content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path foreach( $encoding in $knownEncodings ) { if( Test-Preamble $encoding $byte ) { return $encoding } } # Usually Default encoding preamble is empty and we return it, but in case # that's not true we assume file without preamble to be UTP7 encoded [Text.Encoding]::UTF7 } function ConvertTo-LineEnding { <# .SYNOPSIS Convert line endings of a file to LF or CRLF .DESCRIPTION Reads a text file, converts all line endings to the specified format, and writes it back. Validates that the file exists, is a text file, and is not a binary file before making changes. .PARAMETER Path The path to the file whose line endings should be converted. Accepts pipeline input and wildcards. .PARAMETER LF Convert line endings to Unix-style LF (\\n). .PARAMETER CRLF Convert line endings to Windows-style CRLF (\\r\\n). .EXAMPLE ConvertTo-LineEnding -Path "script.ps1" -LF Converts script.ps1 line endings to LF. .EXAMPLE Get-ChildItem *.ps1 | ConvertTo-LineEnding -CRLF Converts all .ps1 files in the current directory to CRLF line endings. .EXAMPLE ConvertTo-LineEnding -Path "README.md" -CRLF Converts README.md line endings to CRLF. #> [CmdletBinding(SupportsShouldProcess)] param ( [Parameter(Mandatory = $true, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)] [Alias("FullName")] [string[]] $Path, [Parameter(Mandatory = $true, ParameterSetName = "LF")] [switch] $LF, [Parameter(Mandatory = $true, ParameterSetName = "CRLF")] [switch] $CRLF ) process { foreach( $filePath in $Path ) { $resolvedPaths = Resolve-Path $filePath -ErrorAction SilentlyContinue if( -not $resolvedPaths ) { Write-Error "File not found: $filePath" continue } foreach( $resolved in $resolvedPaths ) { $file = $resolved.Path if( -not (Test-Path $file -PathType Leaf) ) { Write-Error "Not a file: $file" continue } # Check for binary content by reading raw bytes and looking for null bytes $bytes = [System.IO.File]::ReadAllBytes($file) $sampleSize = [Math]::Min($bytes.Length, 8192) $hasBinaryContent = $false for( $i = 0; $i -lt $sampleSize; $i++ ) { if( $bytes[$i] -eq 0 ) { $hasBinaryContent = $true break } } if( $hasBinaryContent ) { Write-Error "File appears to be binary and cannot have line endings converted: $file" continue } if( $bytes.Length -eq 0 ) { Write-Warning "File is empty, skipping: $file" continue } if( $PSCmdlet.ShouldProcess($file, "Convert line endings to $($PSCmdlet.ParameterSetName)") ) { # Read with the file's current encoding to preserve it $encoding = Get-FileEncoding -Path $file $content = [System.IO.File]::ReadAllText($file, $encoding) # Normalize all line endings to LF first, then convert to target $content = $content -replace "`r`n", "`n" $content = $content -replace "`r", "`n" if( $CRLF ) { $content = $content -replace "`n", "`r`n" } [System.IO.File]::WriteAllText($file, $content, $encoding) Write-Output "Converted line endings to $($PSCmdlet.ParameterSetName): $file" } } } } } function ConvertTo-Utf8 { <# .SYNOPSIS Convert a file's encoding to UTF-8 .DESCRIPTION Reads a file using its current encoding, then writes it back as UTF-8. Skips files that are already UTF-8 encoded. Validates that the file exists and is not a binary file before making changes. .PARAMETER Path The path to the file whose encoding should be converted to UTF-8. Accepts pipeline input and wildcards. .PARAMETER NoBom Write UTF-8 without a Byte Order Mark (BOM). Default includes BOM. .EXAMPLE ConvertTo-Utf8 -Path "script.ps1" Converts script.ps1 to UTF-8 with BOM. .EXAMPLE ConvertTo-Utf8 -Path "README.md" -NoBom Converts README.md to UTF-8 without BOM. .EXAMPLE Get-ChildItem *.txt | ConvertTo-Utf8 Converts all .txt files in the current directory to UTF-8. #> [CmdletBinding(SupportsShouldProcess)] param ( [Parameter(Mandatory = $true, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)] [Alias("FullName")] [string[]] $Path, [switch] $NoBom ) process { foreach( $filePath in $Path ) { $resolvedPaths = Resolve-Path $filePath -ErrorAction SilentlyContinue if( -not $resolvedPaths ) { Write-Error "File not found: $filePath" continue } foreach( $resolved in $resolvedPaths ) { $file = $resolved.Path if( -not (Test-Path $file -PathType Leaf) ) { Write-Error "Not a file: $file" continue } # Check for binary content $bytes = [System.IO.File]::ReadAllBytes($file) $sampleSize = [Math]::Min($bytes.Length, 8192) $hasBinaryContent = $false for( $i = 0; $i -lt $sampleSize; $i++ ) { if( $bytes[$i] -eq 0 ) { $hasBinaryContent = $true break } } if( $hasBinaryContent ) { Write-Error "File appears to be binary and cannot be re-encoded: $file" continue } if( $bytes.Length -eq 0 ) { Write-Warning "File is empty, skipping: $file" continue } $currentEncoding = Get-FileEncoding -Path $file $targetEncoding = if( $NoBom ) { [System.Text.UTF8Encoding]::new($false) } else { [System.Text.Encoding]::UTF8 } # Check if already UTF-8 if( $currentEncoding.CodePage -eq 65001 ) { # If already UTF-8, check BOM preference $hasBomNow = $bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF $wantsBom = -not $NoBom if( $hasBomNow -eq $wantsBom ) { Write-Output "Already UTF-8, skipping: $file" continue } } if( $PSCmdlet.ShouldProcess($file, "Convert encoding from $($currentEncoding.EncodingName) to UTF-8$(if($NoBom){' (no BOM)'})") ) { $content = [System.IO.File]::ReadAllText($file, $currentEncoding) [System.IO.File]::WriteAllText($file, $content, $targetEncoding) Write-Output "Converted to UTF-8$(if($NoBom){' (no BOM)'}): $file" } } } } } |