#Requires -Modules @{ModuleName="Pester"; ModuleVersion="5.0.0"}

Describe 'UseASCII' {

BeforeAll {
$TemporaryFile = [System.IO.Path]::ChangeExtension((New-TemporaryFile), '.ps1')

Context 'Positives' {

It 'Smart characters' {
$Result = Invoke-ScriptAnalyzer -CustomRulePath .\UseASCII.psm1 -ScriptDefinition { Write-Host 'coöperate' }.ToString()
$Result.RuleName | Should -Be 'PSUseASCII'
$Result.Severity | Should -Be 'Information'

It 'Fix' {
Set-Content -LiteralPath $TemporaryFile -Encoding utf8 -NoNewline -Value {
Use ASCII test
The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
such as "coöperate", without which the <oo> letter sequence could be misinterpreted to be pronounced

# [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')]

Write-Host test –ForegroundColor Red -BackgroundColor Green
Write-Host 'No-break space'
Invoke-ScriptAnalyzer -Fix -CustomRulePath .\UseASCII.psm1 -Path $TemporaryFile
Get-Content -Raw -Literal $TemporaryFile | Should -be {
Use ASCII test
The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
such as "cooperate", without which the <oo> letter sequence could be misinterpreted to be pronounced

# [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'cooperate')]

Write-Host "test" -ForegroundColor 'Red' -BackgroundColor 'Green'
Write-Host 'No-break space'

It 'Suppress' {
Set-Content -LiteralPath $TemporaryFile -Encoding utf8 -NoNewline -Value {
Use ASCII test
The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
such as "coöperate", without which the <oo> letter sequence could be misinterpreted to be pronounced

[System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')]

Write-Host test –ForegroundColor Red -BackgroundColor Green
Write-Host 'No-break space'
Invoke-ScriptAnalyzer -Fix -CustomRulePath .\UseASCII.psm1 -Path $TemporaryFile -ErrorAction SilentlyContinue
Get-Content -Raw -Literal $TemporaryFile | Should -be {
Use ASCII test
The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
such as "coöperate", without which the <oo> letter sequence could be misinterpreted to be pronounced

[System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')]

Write-Host "test" -ForegroundColor 'Red' -BackgroundColor 'Green'
Write-Host 'No-break space'

AfterAll {
# if (Test-Path -LiteralPath $TemporaryFile) { Remove-Item -LiteralPath $TemporaryFile }
132 changes: 132 additions & 0 deletions UseASCII.psm1
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#Requires -Version 3.0

function Measure-UseASCII {
Use UTF-8 Characters
Validates if only ASCII characters are used and reveal the position of any violation.

Param (
[Parameter(Mandatory = $true)]
Begin {
function GetNonASCIIPositions ([String]$Text) {
$LF = [Char]0x0A
$DEL = [Char]0x7F
$LineNumber = 1; $ColumnNumber = 1
for ($Offset = 0; $Offset -lt $Text.Length; $Offset++) {
$Character = $Text[$Offset]
if ($Character -eq $Lf) {
$ColumnNumber = 0
else {
if ($Character -gt $Del) {
Character = $Character
Offset = $Offset
LineNumber = $LineNumber
ColumnNumber = $ColumnNumber

function CharToHex([Char]$Char) {
function SuggestedASCII([Char]$Char) {
switch ([Int]$Char) {
0x00A0 { ' ' }
0x1806 { '-' }
0x2010 { '-' }
0x2011 { '-' }
0x2012 { '-' }
0x2013 { '-' }
0x2014 { '-' }
0x2015 { '-' }
0x2016 { '-' }
0x2212 { '-' }
0x2018 { "'" }
0x2019 { "'" }
0x201A { "'" }
0x201B { "'" }
0x201C { '"' }
0x201D { '"' }
0x201E { '"' }
0x201F { '"' }
Default {
$ASCII = $Char.ToString().Normalize([System.text.NormalizationForm]::FormD)[0]
if ($ASCII -le 0x7F) { $ASCII } else { '_' }


Process {
# As the AST parser, tokenize doesn't capture (smart) quotes
# $Tokens = [System.Management.Automation.PSParser]::Tokenize($ScriptBlockAst.Extent.Text, [ref]$null)
# $Violations = $Tokens.where{ $_.Content -cMatch '[\u0100-\uFFFF]' }
$Violations = GetNonASCIIPositions $ScriptBlockAst.Extent.Text
Foreach ($Violation in $Violations) {
$Text = $ScriptBlockAst.Extent.Text
For ($i = $Violation.Offset - 1; $i -ge 0; $i--) { if ($Text[$i] -NotMatch '\w') { break } }
$Start = $i + 1
For ($i = $Violation.Offset + 1; $i -lt $Text.Length; $i++) { if ($Text[$i] -NotMatch '\w') { break } }
$Length = $i - $Start
$Word = $Text.SubString($Start, $Length)

$StartPosition = [System.Management.Automation.Language.ScriptPosition]::new(
$EndPosition = [System.Management.Automation.Language.ScriptPosition]::new(
($Violation.ColumnNumber + 1),
$Extent = [System.Management.Automation.Language.ScriptExtent]::new($StartPosition, $EndPosition)
$Character = $Violation.Character
$UniCode = "U+$(CharToHex $Character)"
$SuggestedASCII = SuggestedASCII $Character
$AscCode = "U+$(CharToHex $SuggestedASCII)"
Message = "Non-ASCII character $UniCode found in: $Word"
Extent = $Extent
RuleName = 'PSUseASCII'
Severity = 'Information'
RuleSuppressionID = $Word
SuggestedCorrections = [System.Collections.ObjectModel.Collection[Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.CorrectionExtent]](
($Violation.ColumnNumber + 1),
"Replace '$Character' ($UniCode) with '$SuggestedASCII' ($AscCode)"
Export-ModuleMember -Function Measure-*

