test.ps1
Set-StrictMode -Version 2 $errorActionPreference = 'Stop' $scriptDir = Split-Path $psCommandPath function test { param( [string] $name, [scriptblock]$sb ) try { & $sb } catch { Write-Host -ForegroundColor Red "[$name] failed - $_" return } Write-Host -ForegroundColor Green "[$name] passed" } function ce { param( $expected, $actual ) if(-not ($expected -eq $actual)){ throw "Items were not equal. Expected [$expected] Actual [$actual]" } } function cm { param( $pattern, $actual ) if(-not ($actual -match $pattern)){ throw "Item did not match. [$actual] did not match pattern [$pattern]" } } function cnm { param( $pattern, $actual ) if($actual -match $pattern){ throw "Item matched unexpectedly. [$actual] matched pattern [$pattern]" } } function cae { param( [object[]] $expected, [object[]] $actual ) if($expected.Length -ne $actual.Length){ throw "Expected length $($expected.Length) does not match actual length $($actual.Length)" } for($i = 0; $i -lt $actual.length; $i++){ ce $actual[$i] $expected[$i] } } Import-Module "$scriptDir/UniShell.psd1" -force # Get-UniCodepoint core tests test 'Get "X" codepoint' { $cp = Get-UniCodepoint 'X' ce 'X' $cp.RawValue ce 'X' $cp.Value ce 0x0058 $cp.Codepoint ce 'U+0058' $cp.CodepointString ce 'LATIN CAPITAL LETTER X' $cp.Name ce 'Basic Latin' $cp.Block ce '0 - Basic Multilingual Plane' $cp.Plane ce '1.1' $cp.UnicodeVersion ce 'Latin' $cp.Script ce 'AL - Alphabetic' $cp.LineBreakClass ce 'Lu - Letter, Uppercase' $cp.Category ce '0 - Spacing, split, enclosing, reordrant, and Tibetan subjoined' $cp.CanonicalCombiningClasses ce 'L - Left-to-Right' $cp.BidiCategory ce $false $cp.Mirrored ce 0x0078 $cp.LowercaseMapping cae @(0x58) $cp.'utf-8' cae @(0x58, 0x00) $cp.'utf-16' cae @(0x00, 0x58) $cp.'utf-16BE' } test 'Codepoint at unicodedata.txt range start' { $cp = Get-UniCodepoint 0x17000 ce 0x17000 $cp.Codepoint ce 'Tangut Ideograph' $cp.Name ce '1 - Supplementary Multilingual Plane' $cp.Plane } test 'Codepoint at unicodedata.txt range end' { $cp = Get-UniCodepoint 0xFFFFD ce 0xFFFFD $cp.Codepoint ce 'Plane 15 Private Use' $cp.Name ce '15 - Supplementary Private Use Area-A' $cp.Plane } test 'Codepoint within unicodedata.txt range' { $cp = Get-UniCodepoint 0x21000 ce 0x21000 $cp.Codepoint ce 'CJK Ideograph Extension B' $cp.Name ce '2 - Supplementary Ideographic Plane' $cp.Plane } test 'Unassigned codepoint' { $cp = Get-UniCodepoint 0x16E00 ce 0x16E00 $cp.Codepoint ce 'Unassigned' $cp.Name ce 'Unassigned' $cp.Block ce '1 - Supplementary Multilingual Plane' $cp.Plane ce 'Unknown' $cp.Script ce 'XX - Unknown' $cp.LineBreakClass ce $null $cp.Category ce $null $cp.BidiCategory ce $null $cp.DecompositionMapping ce $null $cp.DecimalDigitValue ce $null $cp.DigitValue ce $null $cp.NumericValue ce $false $cp.Mirrored ce $null $cp.UppercaseMapping ce $null $cp.LowercaseMapping ce $null $cp.TitlecaseMapping } test 'Numeric codepoint' { $cp = Get-UniCodepoint 0x2181 ce 0x2181 $cp.Codepoint ce 'ROMAN NUMERAL FIVE THOUSAND' $cp.Name ce "$([char]0x2181)" $cp.RawValue ce "$([char]0x2181)" $cp.Value ce 5000 $cp.NumericValue } test 'Digit codepoint' { $cp = Get-UniCodepoint 0xA8D5 ce 0xA8D5 $cp.Codepoint ce 'SAURASHTRA DIGIT FIVE' $cp.Name ce "$([char]0xA8D5)" $cp.RawValue ce "$([char]0xA8D5)" $cp.Value ce 5 $cp.DecimalDigitValue ce 5 $cp.DigitValue ce 5 $cp.NumericValue } test "Isolated unpaired high surrogate" { $cp = Get-UniCodepoint 0xD801 ce 0xD801 $cp.Codepoint ce "$([char]0xD801)" $cp.RawValue ce "$([char]0xD801)" $cp.Value ce 'High Surrogates' $cp.Block ce 'Non Private Use High Surrogate' $cp.Name ce 'SG - Surrogate' $cp.LineBreakClass ce 'Cs - Other, Surrogate' $cp.Category } test "Interpolated unpaired high surrogate" { $cp = Get-UniCodepoint "A$([char]0xD801)B" ce 3 $cp.length ce 0x0041 $cp[0].Codepoint ce 0xd801 $cp[1].Codepoint ce 0x0042 $cp[2].Codepoint } test "Isolated unpaired low surrogate" { $cp = Get-UniCodepoint 0xDC01 ce 0xDC01 $cp.Codepoint ce "$([char]0xDC01)" $cp.RawValue ce 'Low Surrogates' $cp.Block ce 'Low Surrogate' $cp.Name ce 'SG - Surrogate' $cp.LineBreakClass ce 'Cs - Other, Surrogate' $cp.Category } test "Interpolated unpaired low surrogate" { $cp = Get-UniCodepoint "A$([char]0xDC01)B" ce 3 $cp.length ce 0x0041 $cp[0].Codepoint ce 0xDC01 $cp[1].Codepoint ce 0x0042 $cp[2].Codepoint } test "Jumbled isolated surrogates" { $hi = [char]0xD802 $lo = [char]0xDC02 $cp = Get-UniCodepoint "$lo$lo $hi$hi $lo$hi" ce 8 $cp.length cae @(0xdc02,0xdc02,0x0020,0xd802,0xd802,0x0020,0xdc02,0xd802) $cp.Codepoint } # Get-UniCodepoint formatting tests test "Combiners for simple latin string" { $cp = 'abc' | Get-UniCodepoint ce '┌─' $cp[0]._Combiner ce '├─' $cp[1]._Combiner ce '└─' $cp[2]._Combiner } test "Combiners for simple latin single char" { $cp = 'a' | Get-UniCodepoint ce '──' $cp._Combiner } test "Combiners for combined chars at start, more chars after" { $cp = "a$([char]0x0301)$([char]0x0307)b" | Get-UniCodepoint ce '┌┬' $cp[0]._Combiner ce '│├' $cp[1]._Combiner ce '│└' $cp[2]._Combiner } test "Combiners for combined chars at start, no chars after" { $cp = "a$([char]0x0301)$([char]0x0307)" | Get-UniCodepoint ce '─┬' $cp[0]._Combiner ce ' ├' $cp[1]._Combiner ce ' └' $cp[2]._Combiner } test "Combiners for combined chars after start, more chars after" { $cp = "xa$([char]0x0301)$([char]0x0307)b" | Get-UniCodepoint ce '├┬' $cp[1]._Combiner ce '│├' $cp[2]._Combiner ce '│└' $cp[3]._Combiner } test "Combiners for combined chars after start, no chars after" { $cp = "xa$([char]0x0301)$([char]0x0307)" | Get-UniCodepoint ce '└┬' $cp[1]._Combiner ce ' ├' $cp[2]._Combiner ce ' └' $cp[3]._Combiner } test "per-codepoint display values" { $cp = Get-UniCodepoint 0x007f ce ([char]0x2421) $cp.Value cae @(0x7f, 0x00) $cp.'utf-16' $cp = Get-UniCodepoint 0x83 ce 'NBH' $cp.Value $cp = Get-UniCodepoint 0x2066 ce 'LRI' $cp.Value $cp = Get-UniCodepoint 0xFFFB ce 'IAT' $cp.Value $cp = Get-UniCodepoint 0xE0001 ce 'LANG TAG' $cp.Value $cp = Get-UniCodepoint 0xE0020 ce "TAG $([char]0x2420)" $cp.Value $cp = Get-UniCodepoint 0xE007F ce "TAG $([char]0x0018)" $cp.Value } test "c0 control display values" { $cp = Get-UniCodepoint 0x00 ce ([char]0x2400) $cp.Value $cp = Get-UniCodepoint 0x1f ce ([char]0x241f) $cp.Value } test "tag control display values" { $cp = Get-UniCodepoint 0xE0021 ce 'Tag !' $cp.Value $cp = Get-UniCodepoint 0xE007E ce 'Tag ~' $cp.Value } test "mongolian free variation selector display values" { $cp = Get-UniCodepoint 0x180B ce 'FVS1' $cp.Value $cp = Get-UniCodepoint 0x180D ce 'FVS3' $cp.Value } test "variation selector display values" { $cp = Get-UniCodepoint 0xFE00 ce 'VS1' $cp.Value $cp = Get-UniCodepoint 0xFE0F ce 'VS16' $cp.Value } test "supplemental variation selector display values" { $cp = Get-UniCodepoint 0xE0100 ce 'VS17' $cp.Value $cp = Get-UniCodepoint 0xE01EF ce 'VS256' $cp.Value } test "Display value used in table formatting" { $output = Get-UniCodepoint 0x00 | Out-String cm ([char]0x2400) $output cnm ([char]0x00) $output } test "Display value used in list formatting" { $output = Get-UniCodepoint 0x00 | fl | Out-String cm "Value +: $([char]0x2400)" $output cnm ([char]0x00) $output } test "Bytes formatted as space-delimited hex" { $output = "a$([char]0x0322)" | Get-UniCodepoint | Out-string cm '\s61 00\s' $output cm '\sCC A2\s+22 03\s' $output } test "Specified encodings are added to default table output" { $output = "a$([char]0x0322)" | Get-UniCodepoint -encoding utf-32, utf-16BE | Out-string cm ' 61 00 00 00\s+00 61 ' $output cm ' 22 03 00 00 +03 22 ' $output } test "Specified encodings are added to default list output" { $output = "a$([char]0x0322)" | Get-UniCodepoint -encoding utf-32, utf-16BE | fl | Out-string cm '\r?\nutf-32 +: 61 00 00 00\r?\n' $output cm '\r?\nutf-16BE +: 00 61\r?\n' $output } test "Not-specified encodings are not added to default list output" { $output = "a$([char]0x0322)" | Get-UniCodepoint -encoding utf-32 | fl | Out-string cnm 'utf-8' $output cnm 'utf-16' $output } test "Hidden fields are not shown in default list output" { $output = "abc" | Get-UniCodepoint | fl | Out-string cnm '_Combiner' $output cnm '_OriginatingString' $output } # Get-UniByte tests test "Pass a string, default encoding" { $b = 'test' | Get-UniByte cae @(116, 101, 115, 116) $b } test "Pass a string, custom encoding" { $b = 'test' | Get-UniByte -E utf-16BE cae @(0, 116, 0, 101, 0, 115, 0, 116) $b } test "Pass codepoints, default encoding" { $b = 'test' | Get-UniCodepoint | Get-UniByte cae @(116, 101, 115, 116) $b } test "Pass codepoints, custom encoding" { $b = 'test' | Get-UniCodepoint | Get-UniByte -E utf-16BE cae @(0, 116, 0, 101, 0, 115, 0, 116) $b } # Get-UniString tests test "Simple latin string, pass as bytes" { $s = Get-UniString -Bytes 116, 101, 115, 116 ce 'test' $s } test "Simple latin string, pass a codepoints" { $s = 116, 101, 115, 116 | Get-UniString ce 'test' $s } test "Simple latin string, pass as bytes with custom encoding" { $s = 116, 0, 101, 0, 115, 0, 116, 0 | Get-UniString -enc utf-16 ce 'test' $s } test "Codepoints larger than byte max" { $s = 109, 101, 104, 32, 129335 | Get-UniString ce 'meh 🤷' $s $s = 'meh 🤷' | Get-UniCodepoint | Get-UniString ce 'meh 🤷' $s } # module-level tests test "module import can download data files" { $files = @('UnicodeData','DerivedAge','Blocks','Scripts','LineBreak') $files |%{ Remove-Item "$scriptDir/$_.txt" -ea 0 } Import-Module "$scriptDir/UniShell.psd1" -force -ArgumentList ($scriptDir, @('utf-8'), $true) $files |%{ if(-not (Test-path "$scriptDir/$_.txt")){ throw "Expected to find file $_.txt downloaded" } } } |