Skip to content

Commit

Permalink
convert bash's ANSI-C quoted strings to their value
Browse files Browse the repository at this point in the history
  • Loading branch information
boris committed Jul 4, 2021
1 parent 23cb0f3 commit ab6d8b6
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 12 deletions.
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ $ node example.js --foo=99.3
* default: `true`
* key: `parse-positional-numbers`

Should positional keys that look like numbers be treated as such.
Should positional keys that look like numbers be treated as such?

```console
$ node example.js 99.3
Expand Down Expand Up @@ -502,6 +502,28 @@ $ node example.js --unknown-option --known-option 2 --string-option --unknown-op
{ _: ['--unknown-option'], knownOption: 2, stringOption: '--unknown-option2' }
```

### parse bash ANSI-C strings

* default: `false`
* key: `parse-bash-ansi-c-strings`

Should arguments that look like [ANSI-C quoted strings](https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html) (a bash-only feature) be treated as such?

_if disabled:_

```console
> const parser = require('yargs-parser')
> parser("--foo $'hello world'")
{ _: [], foo: "$'hello world'" }
```

_if enabled:_

```console
> parser("--foo $'hello world'", {configuration: {'parse-bash-ansi-c-strings': true}})
{ _: [], foo: 'hello world' }
```

## Supported Node.js Versions

Libraries in this ecosystem make a best effort to track
Expand Down
3 changes: 2 additions & 1 deletion browser.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// specific libraries, such as "path".
//
// TODO: figure out reasonable web equivalents for "resolve", "normalize", etc.
import { camelCase, decamelize, looksLikeNumber } from './build/lib/string-utils.js'
import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './build/lib/string-utils.js'
import { YargsParser } from './build/lib/yargs-parser.js'
const parser = new YargsParser({
cwd: () => { return '' },
Expand All @@ -25,5 +25,6 @@ yargsParser.detailed = function (args, opts) {
yargsParser.camelCase = camelCase
yargsParser.decamelize = decamelize
yargsParser.looksLikeNumber = looksLikeNumber
yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString

export default yargsParser
3 changes: 2 additions & 1 deletion deno.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
// TODO: find reasonable replacement for require logic.
import * as path from 'https://deno.land/std/path/mod.ts'
import { camelCase, decamelize, looksLikeNumber } from './build/lib/string-utils.js'
import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './build/lib/string-utils.js'
import { YargsParser } from './build/lib/yargs-parser.js'
import type { Arguments, ArgsInput, Parser, Options, DetailedArguments } from './build/lib/yargs-parser-types.d.ts'

Expand Down Expand Up @@ -34,5 +34,6 @@ yargsParser.detailed = function (args: ArgsInput, opts?: Partial<Options>): Deta
yargsParser.camelCase = camelCase
yargsParser.decamelize = decamelize
yargsParser.looksLikeNumber = looksLikeNumber
yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString

export default yargsParser
4 changes: 3 additions & 1 deletion lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { format } from 'util'
import { readFileSync } from 'fs'
import { normalize, resolve } from 'path'
import { ArgsInput, Arguments, Parser, Options, DetailedArguments } from './yargs-parser-types.js'
import { camelCase, decamelize, looksLikeNumber } from './string-utils.js'
import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './string-utils.js'
import { YargsParser } from './yargs-parser.js'

// See https://github.com/yargs/yargs-parser#supported-nodejs-versions for our
Expand Down Expand Up @@ -58,4 +58,6 @@ yargsParser.detailed = function (args: ArgsInput, opts?: Partial<Options>): Deta
yargsParser.camelCase = camelCase
yargsParser.decamelize = decamelize
yargsParser.looksLikeNumber = looksLikeNumber
yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString

export default yargsParser
70 changes: 70 additions & 0 deletions lib/string-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,73 @@ export function looksLikeNumber (x: null | undefined | number | string): boolean
if (/^0[^.]/.test(x)) return false
return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x)
}

// ANSI-C quoted strings are a bash-only feature and have the form $'some text'
// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
//
// https://git.savannah.gnu.org/cgit/bash.git/tree/lib/sh/strtrans.c
export function parseAnsiCQuotedString (str: string): string {
function unescapeChar (m: string): string {
switch (m.charAt(1)) {
case '\\':
return '\\'
case 'a':
return '\a' // eslint-disable-line
case 'b':
return '\b'
case 'e':
case 'E':
return '\x1B'
case 'f':
return '\f'
case 'n':
return '\n'
case 'r':
return '\r'
case 't':
return '\t'
case 'v':
return '\v'
case "'":
return "'"
case '"':
return '"'
case '?':
return '?'
case 'c':
// bash handles all characters by considering the first byte
// of its UTF-8 input and can produce invalid UTF-8, whereas
// JavaScript stores strings in UTF-16
if (m.codePointAt(2)! > 127) {
throw Error("non-ASCII control character in ANSI-C quoted string: '\\u{" + m.codePointAt(2)!.toString(16) + "}'")
}
// If this produces a 0x00 (null) character, it will cause bash to
// terminate the string at that character, but we return the null
// character in the result.
return m[2] === '?' ? '\x7F' : String.fromCodePoint(m[2].toUpperCase().codePointAt(0)! & 0b00011111)
case 'x':
case 'u':
case 'U':
// Hexadecimal character literal
// Unlike bash, this will error if the the code point is greater than 10FFFF
return String.fromCodePoint(parseInt(m.slice(2), 16))
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
// Octal character literal
return String.fromCodePoint(parseInt(m.slice(1), 8) % 256)
default:
// There must be a mis-match between ANSI_BACKSLASHES and the switch statement
throw Error("Unhandled character in ANSI-C escape code: " + JSON.stringify(m))
}

}

const ANSI_BACKSLASHES = /\\(\\|a|b|e|E|f|n|r|t|v|'|"|\?|[0-7]{1,3}|x[0-9A-Fa-f]{1,2}|u[0-9A-Fa-f]{1,4}|U[0-9A-Fa-f]{1,8}|c.)/gs
return str.substring(2, str.length - 1).replace(ANSI_BACKSLASHES, unescapeChar)
}
7 changes: 5 additions & 2 deletions lib/yargs-parser-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,12 @@ export interface Configuration {
'nargs-eats-options': boolean;
/** The prefix to use for negated boolean variables. Default is `'no-'` */
'negation-prefix': string;
/** Should positional values that look like numbers be parsed? Default is `true` */
'parse-positional-numbers': boolean;
/** Should positional values that look ANSI-C strings (a bash-only feature) be parsed? Default is `false` */
'parse-bash-ansi-c-strings': boolean;
/** Should keys that look like numbers be treated as such? Default is `true` */
'parse-numbers': boolean;
/** Should positional values that look like numbers be parsed? Default is `true` */
'parse-positional-numbers': boolean;
/** Should unparsed flags be stored in -- or _? Default is `false` */
'populate--': boolean;
/** Should a placeholder be added for keys not set via the corresponding CLI argument? Default is `false` */
Expand Down Expand Up @@ -155,6 +157,7 @@ export interface Parser {
camelCase(str: string): string;
decamelize(str: string, joinString?: string): string;
looksLikeNumber(x: null | undefined | number | string): boolean;
parseAnsiCQuotedString(str: string): string;
}

export type StringFlag = Dictionary<string[]>;
Expand Down
25 changes: 19 additions & 6 deletions lib/yargs-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@ import type {
YargsParserMixin
} from './yargs-parser-types.js'
import { DefaultValuesForTypeKey } from './yargs-parser-types.js'
import { camelCase, decamelize, looksLikeNumber } from './string-utils.js'
import {
camelCase,
decamelize,
looksLikeNumber,
parseAnsiCQuotedString
} from './string-utils.js'

let mixin: YargsParserMixin
export class YargsParser {
Expand Down Expand Up @@ -75,6 +80,7 @@ export class YargsParser {
'negation-prefix': 'no-',
'parse-numbers': true,
'parse-positional-numbers': true,
'parse-bash-ansi-c-strings': false,
'populate--': false,
'set-placeholder-key': false,
'short-option-groups': true,
Expand Down Expand Up @@ -607,11 +613,18 @@ export class YargsParser {

function processValue (key: string, val: any) {
// strings may be quoted, clean this up as we assign values.
if (typeof val === 'string' &&
(val[0] === "'" || val[0] === '"') &&
val[val.length - 1] === val[0]
) {
val = val.substring(1, val.length - 1)
if (typeof val === 'string') {
if ((val[0] === "'" || val[0] === '"') &&
val[val.length - 1] === val[0]
) {
val = val.substring(1, val.length - 1)
} else if (configuration['parse-bash-ansi-c-strings'] && val.slice(0, 2) === "$'" && val[val.length - 1] === "'") {
try {
val = parseAnsiCQuotedString(val)
} catch (err) {
error = err
}
}
}

// handle parsing boolean arguments --foo=true --bar false.
Expand Down
110 changes: 110 additions & 0 deletions test/yargs-parser.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -3589,6 +3589,116 @@ describe('yargs-parser', function () {
})
})

// see: https://github.com/yargs/yargs-parser/issues/346
describe('ANSI-C quoted strings', () => {
it('does not parse ANSI-C quoted strings by default', function () {
const args = parser(["$'\\n'"])
args._[0].should.equal("$'\\n'")
const args2 = parser("--foo $'\\t'")
args2.foo.should.equal("$'\\t'")
})

it('handles bash ANSI-C quoted strings', () => {
const args = parser("--foo $'text with \\n newline'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.foo.should.equal('text with \n newline')

// Double quotes shouldn't work
const args2 = parser('--foo $"text without \\n newline"', {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args2.foo.should.equal('$"text without \\n newline"')

const characters = '\\\\' + '\\a' + '\\b' + '\\e' + '\\E' + '\\f' + '\\n' + '\\r' + '\\t' + '\\v' + "\\'" + '\\"' + '\\?'
const args3 = parser("--foo $'" + characters + "'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args3.foo.should.equal('\\\a\b\u001b\u001b\f\n\r\t\v\'"?') // eslint-disable-line

const args4 = parser("--foo $'text \\xFFFF with \\xFF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args4.foo.should.equal('text \u00FFFF with \u00FF hex')
const args5 = parser("--foo $'text \\uFFFFFF\\uFFFF with \\uFF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args5.foo.should.equal('text \uFFFFFF\uFFFF with \u00FF hex')
const args6 = parser("--foo $'text \\U10FFFF\\UFFFF with \\U00FF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
const longCodePoint = String.fromCodePoint(0x10FFFF)
args6.foo.should.equal(`text ${longCodePoint}\uFFFF with \u00FF hex`)

const args7 = parser("--foo $'text \\cAB \\cz with \\c12 control \\c011 chars'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args7.foo.should.equal('text \u0001B \u001A with \u00112 control \u001011 chars')

const args8 = parser("--foo $'text \\0 \\001 with \\12 \\123 \\129 octal'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args8.foo.should.equal('text \u0000 \u0001 with \u000A \u0053 \u000A9 octal')
})

it('handles edge case characters in control code escapes', () => {
const args = parser("--foo $'\\c\\t'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.foo.should.equal('\x1Ct')

// Check that the regex matches whitespace characters
const args2 = parser("--foo $'\\c\nt'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args2.foo.should.equal('\nt')

const args3 = parser("--foo $'\\c '", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args3.foo.should.equal('\x00')

// This is a special case
const args4 = parser("--foo $'\\c?'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args4.foo.should.equal('\x7F')
})

it('throws error for non-ASCII characters in control code escapes', () => {
const args = parser.detailed("--foo $'\\c\u0080'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.error.message.should.match(/non-ASCII control character in ANSI-C quoted/)
})
})

// see: https://github.com/yargs/yargs-parser/issues/144
it('number/string types should use default when no right-hand value', () => {
let argv = parser(['--foo'], {
Expand Down

0 comments on commit ab6d8b6

Please sign in to comment.